library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Background

This file is for analysis of several coronavirus data sources focused on the US in 2020. Data include areas such as cases, hospitalizations, and deaths tracked to cornavirus, as well as all-cause deaths.

The previous version of this file includes many exploratory analysis components. This version is designed with the following portions of the previous code:

  1. Functions
  2. Run key analyses
  3. Save and load key files

COVID Tracking Project

The first section is for analysis of data from The COVID Tracking Project. This file contains data on positive tests, hospitalizations, deaths, and the like for coronavirus cases in the US. Downloaded data are unique by state and date.

Key Functions (COVID Tracking Project)

The previous analysis made frequent use of the functional form. The functions for downloading and analysizing data from the COVID Tracking Project include:

# Function to extract and format key state data
getStateData <- function(df=usmap::statepop, 
                         renameVars=c("abbr"="state", "full"="name", "pop_2015"="pop"), 
                         keepVars=c("state", "name", "pop")
                         ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing state data
    # renameVars: variables to be renamed, using named list with format "originalName"="newName"
    # keepVars: variables to be kept in the final file
    
    # Rename variables where appropriate
    names(df) <- ifelse(is.na(renameVars[names(df)]), names(df), renameVars[names(df)])
    
    # Return file with only key variables kept
    df %>%
        select_at(vars(all_of(keepVars)))
    
}



# Function to read in the raw coronavirus data file (assume it is already downloaded)
readCVData <- function(fileName,
                       showGlimpse=TRUE,
                       uqVars=c("date", "state"),
                       errDups=TRUE
                       ) {

    # FUNCTION ARGUMENTS
    # fileName: location of the downloded CSV file from COVID Tracking Project
    # showGlimpse: boolean, whether to run glimpse() on the file
    # uqVars: variables that the file is expected to be unique by
    # errDups: boolean, whether to error out if uniqueness is violated
    
    # Read in the file and convert the 'date' from double to date
    cvData <- readr::read_csv(fileName) %>%
        mutate(date=lubridate::ymd(date))
    
    # See a sample of the data
    if (showGlimpse) glimpse(cvData)

    # Check that the data are unique by date and state
    nDups <- cvData %>%
        select_at(vars(all_of(uqVars))) %>%
        anyDuplicated()
    
    # Inform of the uniqueness check results
    if (nDups==0) {
        cat("\nFile is unique by:", uqVars, "and has dimensions:", dim(cvData), "\n")
    } else {
        cat("\nUniqueness check failed, file has duplicates by:", uqVars, "\n")
        if (errDups) stop("Fix and re-run")
    }
    
    # Return the file
    cvData
    
}


# Function to select relevant variables and observations, and report on control totals
processCVData <- function(dfFull, 
                          varsKeep=c("date", "state", "positiveIncrease", "deathIncrease"), 
                          varsRename=c("positiveIncrease"="cases", "deathIncrease"="deaths"), 
                          stateList=c(state.abb, "DC")
                          ) {
    
    # FUNCTION ARGUMENTS
    # dfFull: the full data file originally loaded
    # varsKeep: variables to keep from the full file
    # varsRename: variables to be renamed, using a named vector of form originalName=newName
    # stateList: variables for filtering state (NULL means do not run any filters)
    
    # Select only the key variables
    df <- dfFull %>%
        select_at(vars(all_of(varsKeep)))
    
    # Apply the renaming of variables
    names(df) <- ifelse(is.na(varsRename[names(df)]), names(df), varsRename[names(df)])

    # Designate each record as being either a valid state or not
    if (!is.null(stateList)) {
        df <- df %>%
            mutate(validState=state %in% stateList)
    } else {
        df <- df %>%
            mutate(validState=TRUE)
    }
    
    # Summarize the control totals for the data, based on whether the state is valid
    cat("\n\nControl totals - note that validState other than TRUE will be discarded\n\n")
    df %>%
        mutate(n=1) %>%
        group_by(validState) %>%
        summarize_if(is.numeric, sum) %>%
        print()
    
    # Return the file, filtered to where validState is TRUE, and deleting variable validState
    df %>%
        filter(validState) %>%
        select(-validState)

}



# Helper function to create per capita metrics
helperPerCapita <- function(df, 
                            origVar, 
                            newName,
                            byVar="state",
                            popVar="pop",
                            popData=stateData,
                            mult=1000000
                            ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame currently being processed
    # origVar: the variables to be converted to per capita
    # newName: the new per capita variable name
    # byVar: the variable that will be merged by
    # popVar: the name of the population variable in the popData file
    # popData: the file containing the population data
    # mult: the multiplier, so that the metric is "per mult people"
    
    # Create the per capita variable
    df %>%
        inner_join(select_at(popData, vars(all_of(c(byVar, popVar)))), by=byVar) %>%
        mutate(!!newName:=mult*get(origVar)/get(popVar)) %>%
        select(-all_of(popVar))
    
}


# Helper function to create rolling aggregates
helperRollingAgg <- function(df, 
                             origVar, 
                             newName,
                             func=zoo::rollmean,
                             k=7, 
                             fill=NA, 
                             ...
                             ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing the data
    # origVar: the original data column name
    # newName: the new variable column name
    # func: the function to be applied (zoo::rollmean will be by far the most common)
    # k: the periodicity (k=7 is rolling weekly data)
    # fill: how to fill leading.trailing data to maintain the same vector lengths
    # ...: any other arguments to be passed to func
    
    # Create the appropriate variable
    df %>%
        mutate(!!newName:=func(get(origVar), k=k, fill=fill, ...))
    
}



# Function to add per capita and rolling to the base data frame
# Updated function to take an arbitrary number of variables and convert them
helperMakePerCapita <- function(df, 
                                mapVars=c("cases"="cpm", "deaths"="dpm"),
                                popData=stateData,
                                k=7
                                ) {
    
    # FUNCTION ARGUMENTS:
    # df: the initial data frame for conversion
    # mapVars: named vector of variables to be converted 'original name'='converted name'
    # k: the rolling time period to use
    
    # Create the variables for per capita
    for (origVar in names(mapVars)) {
        df <- df %>% 
            helperPerCapita(origVar=origVar, newName=mapVars[origVar], popData=popData)
    }
    # Arrange the data by date in preparation for rolling aggregations
    df <- df %>% 
        group_by(state) %>% 
        arrange(date)
    # Create the rolling variables
    for (newVar in mapVars) {
        df <- df %>% 
            helperRollingAgg(origVar=newVar, newName=paste0(newVar, k), k=k)
    }

    # Return the updated data frame, ungrouped
    df %>%
        ungroup()
    
}



# Function to create side-by-side plots for a deaths and cases metric
# Data in df will be aggregated to be unique by byVar using aggFunc
helperBarDeathsCases <- function(df, 
                                 numVars,
                                 title="",
                                 xVar="state",
                                 fillVar=NULL,
                                 aggFunc=sum, 
                                 mapper=varMapper
                                 ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing the data
    # numVars: the relevant numeric variables for plotting
    # title: plot title, default is nothing
    # xVar: the x-axis variable for plotting
    # fillVar: the variable that will color the bars in the final plot (NULL means use "lightblue" for all)
    # aggFunc: the aggregate function (will be applied to create data unique by byVar)
    # mapper: mapping file to convert x/y variables to descriptive axes (named vector "variable"="label")
    
    # OVERALL FUNCTION PROCESS:
    # 1.  Variables in numVar are aggregated by aggFunc to be unique by c(xVar, fillVar)
    # 2.  Data are pivoted longer
    # 3.  Bar charts are created, with coloring by fillVar if provided
    
    # Create the byVar for summing
    byVar <- xVar
    if (!is.null(fillVar)) { byVar <- c(byVar, fillVar) }
    
    # Process the data and create the plot
    p1 <- df %>%
        select_at(vars(all_of(c(byVar, numVars)))) %>%
        group_by_at(vars(all_of(byVar))) %>%
        summarize_all(aggFunc) %>%
        pivot_longer(-all_of(byVar)) %>%
        ggplot(aes(x=fct_reorder(get(xVar), value, .fun=min), y=value)) + 
        coord_flip() + 
        facet_wrap(~mapper[name], scales="free_x") + 
        labs(x="", y="", title=title) + 
        if (is.null(fillVar)) geom_col(fill="lightblue") else geom_col(aes_string(fill=fillVar))
    
    # Print the plot
    print(p1)
    
}



# Function to assess state data (no segments created yet)
assessStateData <- function(df, 
                            titleStem="Coronavirus burden by state", 
                            cfrEst=0.005,
                            mapper=varMapper
                            ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing the state-level data
    # titleStem: the main title description, with (total) or (per capita) appended
    # cfrEst: the estimated case fatality rate (CFR); a dashed abline will be plotted at this slope
    # mapper: mapping file to convert x/y variables to descriptive axes (named vector "variable"="label")
    
    # Plot cases and deaths by state, once for overall and once per capita
    helperBarDeathsCases(df, numVars=c("deaths", "cases"), title=paste0(titleStem, " (total)"))
    helperBarDeathsCases(df, numVars=c("dpm", "cpm"), title=paste0(titleStem, " (per capita)"))

    # Disease burden by state, per capita
    p1 <- df %>%
        group_by(state) %>%
        summarize(cpm=sum(cpm), dpm=sum(dpm)) %>%
        ggplot(aes(x=cpm, y=dpm)) +
        geom_text(aes(label=state)) + 
        labs(x=mapper["cpm"], 
             y=mapper["dpm"], 
             title="Deaths vs. cases by state (per million people)", 
             subtitle=paste0("Dashed line is a CFR of ", 
                             round(100*cfrEst, 1), 
                             "% (states far from this may have case counting issues)"
                             )
             ) + 
        geom_abline(slope=cfrEst, lty=2)
    print(p1)
    
    # Total disease burden nationally by day, not using functional form
    p2 <- df %>%
        select(date, cases, deaths) %>%
        group_by(date) %>%
        summarize_if(is.numeric, sum) %>%
        ungroup() %>%
        helperRollingAgg(origVar="cases", newName="casesroll7") %>%
        helperRollingAgg(origVar="deaths", newName="deathsroll7") %>%
        select(-cases, -deaths) %>%
        pivot_longer(-date) %>%
        filter(!is.na(value)) %>%
        ggplot(aes(x=date, y=value)) + 
        geom_line() +
        facet_wrap(~varMapper[name], scales="free_y") + 
        labs(x="", 
             y="",
             title=titleStem
             )
    print(p2)
    
}



# Function to create an elbow plot for various numbers of clusters in the data
helperElbow <- function(mtx, 
                        testCenters, 
                        iter.max, 
                        nstart, 
                        silhouette=FALSE
                        ) {
    
    # FUNCTION ARGUMENTS:
    # mtx: a numeric matrix, or an object that can be coerced to a numeric matrix (no character fields)
    # testCenters: integer vector for the centers to be tested
    # iter.max: parameter passed to kmeans
    # nstart: parameter passed to kmeans
    # silhouette: whether to calculate the silhouette score
    
    # Create an object for storing tot.withinss and silhouetteScore
    totWithin <- vector("numeric", length(testCenters))
    silhouetteScore <- vector("numeric", length(testCenters))
    
    # Create the distancing data (required for silhouette score)
    if (silhouette) distData <- dist(mtx)
    
    # Run k-means for every value in testCenters, and store $tot.withinss (and silhouetteScore, if requested)
    n <- 1
    for (k in testCenters) {
        km <- kmeans(mtx, centers=k, iter.max=iter.max, nstart=nstart)
        totWithin[n] <- km$tot.withinss
        if (silhouette & (k > 1)) silhouetteScore[n] <- mean(cluster::silhouette(km$cluster, distData)[, 3])
        n <- n + 1
    }
    
    # Create the elbow plot
    p1 <- tibble::tibble(n=testCenters, wss=totWithin) %>%
        ggplot(aes(x=n, y=wss)) + 
        geom_point() + 
        geom_line() + 
        geom_text(aes(y=wss + 0.05*max(totWithin), x=n+0.2, label=round(wss, 1))) + 
        labs(x="Number of segments", y="Total Within Sum-Squares", title="Elbow plot") + 
        ylim(c(0, NA))
    
    # Create the silhouette plot if requested
    if (silhouette) {
        p2 <- tibble::tibble(n=testCenters, ss=silhouetteScore) %>%
            ggplot(aes(x=n, y=ss)) + 
            geom_point() + 
            geom_line() + 
            geom_text(aes(y=ss + 0.05*max(silhouetteScore), x=n+0.2, label=round(ss, 1))) + 
            labs(x="Number of segments", y="Mean silhouette width", title="Silhouette plot") + 
            ylim(c(-1, NA))
        gridExtra::grid.arrange(p1, p2, nrow=1)
    } else {
        print(p1)
    }
    
}


# Function to create clusters for the state data (requires all data from same year, as currently true)
clusterStates <- function(df, 
                          caseVar="cpm", 
                          deathVar="dpm",
                          shapeFunc=lubridate::month, 
                          minShape=NULL, 
                          minDeath=0,
                          minCase=0,
                          ratioTotalvsShape=1, 
                          ratioDeathvsCase=1, 
                          hierarchical=TRUE, 
                          hierMethod="complete", 
                          nCenters=3, 
                          iter.max=10,
                          nstart=1,
                          testCenters=NULL,
                          returnList=FALSE, 
                          seed=NULL
                          ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing cases and deaths data
    # caseVar: the variable containing the cases per capita data
    # deathVar: the variable containing the deaths per capita data
    # shapeFunc: the function to be used for creating the shape of the curve
    # minShape: the minimum value to be used for shape (to avoid very small amounts of data in Jan/Feb)
    #           NULL means keep everything
    # minDeath: use this value as a floor for the death metric when calculating shape
    # minCase: use this metric as a floor for the case metric when calculating shape
    # ratioTotalvsShape: amount of standard deviation to be kept in total variable vs shape variables
    # ratioDeathvsCase: amount of standard deviation to be kept in deaths vs cases 
    #                   (total death data will be scaled to have sd this many times higher than cases)
    #                   (death percentages by time period will be scaled directly by this amount)
    # hierarchical: boolean, if TRUE run hierarchical clustering, otherwise run k-means clustering
    #               only hierarchical clustering is currently implemented
    # hierMethod: the method for hierarchical clustering (e.g., 'complete' or 'single')
    # nCenters: the number of centers to use for kmeans clustering
    # testCenters: integer vector of centers to test (will create an elbow plot); NULL means do not test
    # iter.max: maximumum number of kmeans iterations (default in kmeans algorithm is 10)
    # nstart: number of random sets chosen for kmeans (default in kmeans algorithm is 1)
    # returnList: boolean, if FALSE just the cluster object is returned
    #                      if TRUE, a list is returned with dfCluster and the cluster object
    # seed: set the seed to this value (NULL means no seed)
    
    # Extract key information (aggregates and by shapeFunc for each state)
    df <- df %>%
        select_at(vars(all_of(c("date", "state", caseVar, deathVar)))) %>%
        purrr::set_names(c("date", "state", "cases", "deaths")) %>%
        mutate(timeBucket=shapeFunc(date)) %>%
        group_by(state, timeBucket) %>%
        summarize(cases=sum(cases), deaths=sum(deaths)) %>%
        ungroup()
    
    # Limit to only relevant time buckets if requested
    if (!is.null(minShape)) {
        df <- df %>%
            filter(timeBucket >= minShape)
    }
    
    # Extract an aggregate by state, scaled so that they have the proper ratio
    dfAgg <- df %>%
        group_by(state) %>%
        summarize(totalCases=sum(cases), totalDeaths=sum(deaths)) %>%
        ungroup() %>%
        mutate(totalDeaths=ratioDeathvsCase*totalDeaths*sd(totalCases)/sd(totalDeaths))
    
    # Extract the percentages (shapes) by month, scaled so that they have the proper ratio
    dfShape <- df %>%
        pivot_longer(-c(state, timeBucket)) %>%
        group_by(state, name) %>%
        mutate(tot=pmax(sum(value), ifelse(name=="deaths", minDeath, minCase)), 
               value=ifelse(name=="deaths", ratioDeathvsCase, 1) * value / tot) %>%
        select(-tot) %>%
        pivot_wider(state, names_from=c(name, timeBucket), values_from=value) %>%
        ungroup()
    
    # Function to calculate SD of a subset of columns
    calcSumSD <- function(df) {
        df %>% 
            ungroup() %>% 
            select(-state) %>% 
            summarize_all(.funs=sd) %>% 
            as.vector() %>% 
            sum()
    }
    
    # Down-weight the aggregate data so that there is the proper sum of sd in aggregates and shapes
    aggSD <- calcSumSD(dfAgg)
    shapeSD <- calcSumSD(dfShape)
    dfAgg <- dfAgg %>%
        mutate_if(is.numeric, ~. * ratioTotalvsShape * shapeSD / aggSD)

    # Combine so there is one row per state
    dfCluster <- dfAgg %>%
        inner_join(dfShape, by="state")
    
    # convert 'state' to rowname
    keyData <- dfCluster %>% column_to_rownames("state")
    
    # Create hierarchical segments or kmeans segments
    if (hierarchical) {
        objCluster <-  hclust(dist(keyData), method=hierMethod)
        plot(objCluster)
    } else {
        # Create an elbow plot if testCenters is not NULL
        if (!is.null(testCenters)) {
            helperElbow(keyData, testCenters=testCenters, iter.max=iter.max, nstart=nstart, silhouette=TRUE)
        }
        # Create the kmeans cluster object, setting a seed if requested
        if (!is.null(seed)) set.seed(seed)
        objCluster <- kmeans(keyData, centers=nCenters, iter.max=iter.max, nstart=nstart)
        cat("\nCluster means and counts\n")
        n=objCluster$size %>% cbind(objCluster$centers) %>% round(2) %>% t() %>% print()
    }

    # Return the data and object is a list if returnList is TRUE, otherwise return only the clustering object
    if (!isTRUE(returnList)) {
        objCluster
    } else {
        list(objCluster=objCluster, dfCluster=dfCluster)
    }
    
}



# Helper function to assess 30-day change vs. total
helperRecentvsTotal <- function(df, 
                                xVar, 
                                yVar,
                                title,
                                recencyDays=30, 
                                ablineSlope=0.5, 
                                mapper=varMapper, 
                                labelPlot=TRUE,
                                printPlot=TRUE
                                ) {
    
    # FUNCTION ARGUMENTS:
    # df: the tibble containing data by state by day
    # xVar: the x-variable
    # yVar: the y-variable
    # title: the plot title
    # recencyDays: number of days to consider as recent
    # ablineSlope: dashed line will be drawn with this slope and intercept 0
    # mapper: mapping file to convert x/y variables to descriptive axes (named vector "variable"="label")
    # labelPlot: boolean, whether to show the labels for each point
    # printPlot: boolean, whether to display the plot (if FALSE, plot object is returned)
    
    # Get the most date cutoff
    dateCutoff <- df %>% pull(date) %>% max() - recencyDays + 1
    cat("\nRecency is defined as", format(dateCutoff, "%Y-%m-%d"), "through current\n")
    
    # Create the plot
    p1 <- df %>%
        mutate(newCases=ifelse(date >= dateCutoff, cases, 0), 
               newDeaths=ifelse(date >= dateCutoff, deaths, 0), 
               newcpm=ifelse(date >= dateCutoff, cpm, 0), 
               newdpm=ifelse(date >= dateCutoff, dpm, 0)
               ) %>%
        group_by(state, cluster) %>%
        summarize_if(is.numeric, .funs=sum) %>%
        ungroup() %>%
        ggplot(aes_string(x=xVar, y=yVar)) + 
        labs(x=mapper[xVar], 
             y=mapper[yVar], 
             title=title, 
             subtitle=paste0("Dashed line represents ", 
                             round(100*ablineSlope), 
                             "% of total is new in last ", 
                             recencyDays,
                             " days"
                             )
             ) + 
        geom_abline(lty=2, slope=ablineSlope) + 
        lims(x=c(0, NA), y=c(0, NA)) + 
        theme(legend.position = "bottom")
    
    # Add the appropriate geom (scatterplot if labelPlot is FALSE)
    if (labelPlot) p1 <- p1 + geom_text(aes(color=cluster, label=state))
    else p1 <- p1 + geom_point(aes(color=cluster), alpha=0.5)
    
    if (isTRUE(printPlot)) {
        print(p1)
    } else {
        p1
    }

}


# Function to plot cluster vs. individual elements on a key metric
helperTotalvsElements <- function(df, 
                                  keyVar, 
                                  title,
                                  aggAndTotal=TRUE,
                                  pctRibbon=0.8,
                                  aggFunc=if(aggAndTotal) median else mean, 
                                  mapper=varMapper, 
                                  facetScales="free_y", 
                                  printPlot=TRUE
                                  ) {

    # FUNCTION ARGUMENTS:
    # df: the data frame containing n-day rolling averages
    # keyVar: the variable to be plotted
    # title: the plot title
    # aggAndTotal: boolean, whether to plot every individual observation along with the cluster aggregate
    # pctRibbon: if aggAndTotal is FALSE, a ribbon covering this percentage of the data will be plotted
    # aggFunc: how to aggregate the elements to the segment
    #          CAUTION that this is an aggregate of averages, rather than a population-weighted aggregate
    # mapper: the variable mapping file to get the appropriate label for keyVar
    # facetScales: the scaling for the facets - "free_y" to let them all float, "fixed" to have them the same
    # printPlot: boolean, if TRUE print the plot (otherwise return the plot object)

    # Create an appropriate subtitle
    subtitle <- if(facetScales=="free_y") {
        "Caution that each facet has its own y axis with different scales"
    } else if (facetScales=="fixed") { 
        "All facets are on the same scale"
    } else {
        "Update subtitle code in function helperTotalvsElements"
    }

    # Create an appropriate caption
    caption <- if(aggAndTotal) {
        "Cluster aggregate is median, weighting each observation equally\n(NOT population-weighted)"
    } else {
        paste0("1. Cluster aggregate is mean over all observations (NOT population-weighted)\n2. Ribbons reflect range covering ", round(pctRibbon*100), "% of observations")
    }
    
    # Create the plots for segment-level data
    p1 <- df %>%
        rbind(mutate(., state="cluster")) %>%
        group_by(state, cluster, date) %>%
        summarize_at(vars(all_of(keyVar)), .funs=aggFunc) %>%
        ungroup() %>%
        filter(!is.na(get(keyVar))) %>%
        ggplot(aes_string(x="date")) + 
        geom_line(data=~filter(., state == "cluster"), 
                  aes(y=get(keyVar), group=state, color=cluster), 
                  lwd=1.5
                  ) + 
        facet_wrap(~cluster, scales=facetScales) + 
        labs(x="", 
             y=mapper[keyVar], 
             title=title, 
             subtitle=subtitle,
             caption=caption
             ) + 
        ylim(c(0, NA)) + 
        theme(legend.position="bottom")
    
    # Add an appropriate individual metric (either every observation or quantiles)
    if (aggAndTotal) {
        p1 <- p1 + 
            geom_line(data=~filter(., state != "cluster"), 
                      aes(y=get(keyVar), group=state), 
                      alpha=0.25
                      )
    } else {
        dfRibbon <- df %>%
            filter(!is.na(get(keyVar))) %>%
            group_by(cluster, date) %>%
            summarize(ylow=quantile(get(keyVar), 0.5-0.5*pctRibbon), 
                      yhigh=quantile(get(keyVar), 0.5+0.5*pctRibbon)
                      )
        p1 <- p1 + 
            geom_ribbon(data=dfRibbon, 
                        aes(ymin=ylow, ymax=yhigh), 
                        alpha=0.25
                        )
    }
    
    # Print plot if requested, otherwise return it
    if (isTRUE(printPlot)) {
        print(p1)
    } else {
        p1
    }
    
}



# Function to assess clusters
assessClusters <- function(clusters, 
                           dfState=stateData, 
                           dfBurden=cvFilteredPerCapita,
                           thruLabel="Aug 20, 2020",
                           isCounty=FALSE,
                           plotsTogether=FALSE, 
                           clusterPlotsTogether=plotsTogether,
                           recentTotalTogether=plotsTogether, 
                           clusterAggTogether=plotsTogether
                           ) {
    
    # FUNCTION ARGUMENTS:
    # clusters: the named vector containing the clusters by state
    # dfState: the file containing the states and populations
    # dfBurden: the data containing the relevant per capita burden statistics by state-date
    # thruLabel: label for plots for 'data through'
    # isCounty: boolean, is this a plot of county-level data that have been named 'state'?
    #           FALSE means that it is normal state-level data
    # plotsTogether: boolean, should plots be consolidated on fewer pages?
    # clusterPlotsTogether: boolean, should plots p1-p4 be consolidated?
    # recentTotalTogether: boolean, should recent total plots p7-p8 be consolidated?
    # clusterAggTogether: boolean, should aggregate plots p9/p11 and p10/p12 be consolidated?
    
    # Attach the clusters to the state population data
    dfState <- as.data.frame(clusters) %>%
        set_names("cluster") %>%
        rownames_to_column("state") %>%
        inner_join(dfState, by="state") %>%
        mutate(cluster=factor(cluster))
    
    # Plot the segments on a state map (only if !isCounty)
    if (isCounty) {
        p1 <- dfState %>%
            count(cluster) %>%
            ggplot(aes(x=fct_rev(cluster), y=n)) + 
            geom_col(aes(fill=cluster)) +
            geom_text(aes(y=n/2, label=n)) +
            coord_flip() + 
            labs(x="", y="# Counties", title="Membership by segment")
    } else {
        p1 <- usmap::plot_usmap(regions="states", data=dfState, values="cluster") + 
            scale_fill_discrete("cluster") + 
            theme(legend.position="right")
    }
    
    # Plot the population totals by segment
    # Show population totals by cluster
    p2 <- dfState %>%
        group_by(cluster) %>%
        summarize(pop=sum(pop)/1000000) %>%
        ggplot(aes(x=fct_rev(cluster), y=pop)) + 
        geom_col(aes(fill=cluster)) + 
        geom_text(aes(y=pop/2, label=round(pop))) + 
        labs(y="2015 population (millions)", x="Cluster", title="Population by cluster (millions)") +
        coord_flip()
    
    # Plot the rolling 7-day mean dialy disease burden by cluster
    dfPlot <- dfState %>%
        inner_join(dfBurden, by="state") %>%
        tibble::as_tibble()
    
    # Plot the rolling 7-day mean daily disease burden by cluster
    p3 <- dfPlot %>%        
        select(date, cluster, cases=cpm7, deaths=dpm7) %>%
        pivot_longer(-c(date, cluster)) %>%
        filter(!is.na(value)) %>%
        group_by(date, cluster, name) %>%
        summarize(value=median(value)) %>%
        ggplot(aes(x=date, y=value)) +
        geom_line(aes(group=cluster, color=cluster)) +
        facet_wrap(~name, scales="free_y") +
        labs(x="",
             y="Rolling 7-day mean, per million",
             title="Rolling 7-day mean daily disease burden, per million",
             subtitle="Median per day for states assigned to cluster"
             )
    
    # Plot the total cases and total deaths by cluster
    p4 <- dfPlot %>%
        group_by(cluster) %>%
        summarize(cases=sum(cases), deaths=sum(deaths)) %>%
        pivot_longer(-cluster) %>%
        ggplot(aes(x=fct_rev(cluster), y=value/1000)) + 
        geom_col(aes(fill=cluster)) + 
        geom_text(aes(y=value/2000, label=round(value/1000))) +
        coord_flip() + 
        facet_wrap(~varMapper[name], scales="free_x") + 
        labs(x="Cluster", y="Burden (000s)", title="Total cases and deaths by segment")

    # Place the plots together if plotsTogether is TRUE, otherwise just print
    if (isTRUE(clusterPlotsTogether)) {
        gridExtra::grid.arrange(p1, p2, p3, p4, nrow=2, ncol=2)
    } else {
        print(p1); print(p2); print(p3); print(p4)
    }

    # These are relevant and useful only for state-level data
    if (!isCounty) {
        # Plot total cases and total deaths by state, colored by cluster
        helperBarDeathsCases(dfPlot, 
                             numVars=c("cases", "deaths"), 
                             title=paste0("Coronavirus impact by state through ", thruLabel), 
                             xVar=c("state"), 
                             fillVar=c("cluster")
                             )
    
        # Plot cases per million and deaths per million by state, colored by cluster
        helperBarDeathsCases(dfPlot, 
                             numVars=c("cpm", "dpm"), 
                             title=paste0("Coronavirus impact by state through ", thruLabel), 
                             xVar=c("state"), 
                             fillVar=c("cluster")
                             )
    }
    
    # County-level plots will be point-only; state-level plots will be labelled
    # Plot last-30 vs total for cases per million by state, colored by cluster
    p7 <- helperRecentvsTotal(dfPlot, 
                              xVar="cpm", 
                              yVar="newcpm", 
                              title=paste0("Coronavirus burden through ", thruLabel), 
                              labelPlot=!isCounty,
                              printPlot=FALSE
                              )
    
    # Plot last-30 vs total for deaths per million by state, colored by cluster
    p8 <- helperRecentvsTotal(dfPlot, 
                              xVar="dpm", 
                              yVar="newdpm", 
                              title=paste0("Coronavirus burden through ", thruLabel), 
                              labelPlot=!isCounty,
                              printPlot=FALSE
                              )
    
    # Print the plots either as a single page or separately
    if (isTRUE(recentTotalTogether)) {
        gridExtra::grid.arrange(p7, p8, nrow=1)
    } else {
        print(p7); print(p8)
    }
    
    # These are currently only helpful for states (update later to make more useful for counties)
    # Plot the cases per million on a free y-scale and a fixed y-scale
    p9 <- helperTotalvsElements(dfPlot, 
                                keyVar="cpm7", 
                                aggAndTotal=!isCounty,
                                title="Cases per million, 7-day rolling mean", 
                                printPlot=FALSE
                                )
    p10 <- helperTotalvsElements(dfPlot, 
                                 keyVar="cpm7", 
                                 aggAndTotal=!isCounty,
                                 title="Cases per million, 7-day rolling mean", 
                                 facetScales="fixed", 
                                 printPlot=FALSE
                                 )
    
    # Plot the deaths per million on a free y-scale and a fixed y-scale
    p11 <- helperTotalvsElements(dfPlot, 
                                 keyVar="dpm7", 
                                 aggAndTotal=!isCounty,
                                 title="Deaths per million, 7-day rolling mean", 
                                 printPlot=FALSE
                                 )
    p12 <- helperTotalvsElements(dfPlot, 
                                 keyVar="dpm7", 
                                 aggAndTotal=!isCounty,
                                 title="Deaths per million, 7-day rolling mean", 
                                 facetScales="fixed", 
                                 printPlot=FALSE
                                 )
    
    if (isTRUE(clusterAggTogether)) {
        gridExtra::grid.arrange(p9, p11, nrow=1)
        gridExtra::grid.arrange(p10, p12, nrow=1)
    } else {
        print(p9); print(p10); print(p11); print(p12)
    }
    
    # Return the plotting data frame
    dfPlot

}



# Function to create plots of the number hospitalized by state and cluster
plotHospitalized <- function(df, 
                             clusterVector,
                             dfState=stateData, 
                             subT=""
                             ) {
    
    # FUNCTION ARGUMENTS:
    # df: a data frame or tibble containing 'state', 'date', 'hospitalizedCurrently'
    # clusterVector: a named vector of form 'state'='cluster'
    # dfState: a state-level population file containing 'state' and 'pop'
    # subT: a subtitle for the plot
    
    # Create the key plotting data
    plotData <- df %>% 
        inner_join(dfState, by="state") %>% 
        mutate(cluster=factor(clusterVector[state])) %>% 
        filter(!is.na(hospitalizedCurrently)) %>% 
        select(date, state, cluster, hospitalizedCurrently, pop) %>%
        rbind(mutate(., state="Total")) %>%
        group_by(state, cluster, date) %>% 
        summarize(n=n(), 
                  hospitalizedCurrently=sum(hospitalizedCurrently), 
                  pop=sum(pop)
                  ) %>% 
        mutate(hpm=1000000*hospitalizedCurrently/pop) %>%
        helperRollingAgg(origVar="hpm", newName="hpm7") %>%
        ungroup()
    
    # Create the plot
    p1 <- plotData %>%
        filter(!is.na(hpm7)) %>%
        ggplot(aes(x=date, y=hpm7)) + 
        geom_line(data=~filter(., state != "Total"), aes(group=state), alpha=0.25) +
        geom_line(data=~filter(., state == "Total"), aes(group=state, color=cluster), lwd=1.5) + 
        facet_wrap(~cluster, scales="fixed") + 
        ylim(c(0, NA)) + 
        labs(x="", 
             y="Currently Hospitalized 7-day rolling mean (per million)", 
             title="Hospitalized per million by cluster", 
             subtitle=subT
             )
    print(p1)
    
    # Return the plot data
    plotData

}


# Function to download data for COVID Tracking Project
downloadCOVIDbyState <- function(fileName, 
                                 api="https://api.covidtracking.com/v1/states/daily.csv", 
                                 ovrWrite=FALSE
                                 ) {
    
    # COVID Tracking Project API allows data downloads for personal, non-commercial use
    # https://covidtracking.com/data/api
    
    # FUNCTION ARGUMENTS:
    # fileName: the filename that the data will be saved to
    # api: The API link for data downloads
    # ovrWrite: whether to allow overwriting of the existing fileName
    
    # Check whether fileName already exists
    if (file.exists(fileName)) {
        cat("\nFile already exists at:", fileName, "\n")
        if (ovrWrite) cat("Will over-write with current data from", api, "\n")
        else stop("Exiting due to ovrWrite=FALSE and a duplicate fileName\n")
    }
    
    # Download the file 
    download.file(api, destfile=fileName)
    
    # Show statistics on downloaded file
    file.info(fileName)
    
}



# Function to read, convert, and sanity check a downloaded file
readCOViDbyState <- function(fileName, 
                             checkFile=NULL, 
                             controlFields=c("positiveIncrease", "deathIncrease", "hospitalizedCurrently"), 
                             controlBy=c("state")
                             ) {
    
    # FUNCTION ARGUMENTS:
    # fileName: the file name for reading the data
    # checkFile: a file that can be used for comparison purposes (NULL means do not compare to anything)
    # controlFields: fields that will be explicitly checked against checkFile
    # controlBy: level of aggregation at which fields will be explicitly checked against checkFile
    
    # Helper function to check for similarity of key elements
    helperSimilarity <- function(newData, refData, label) {
        cat("\n\nCheckin for similarity of:", label)
        cat("\nIn reference but not in current:", setdiff(refData, newData))
        cat("\nIn current but not in reference:", setdiff(newData, refData))
    }
    
    # Read in the file and convert the numeric date field to date using ymd format
    df <- readr::read_csv(fileName) %>% 
        mutate(date=lubridate::ymd(date))

    # Check that the file is unique by date-state
    if ((df %>% select(date, state) %>% anyDuplicated()) != 0) {
        stop("\nDuplicates by date and state, investigate and fix\n")
    } else {
        cat("\nFile is unique by state and date\n")
    }
    
    # Check for overall control totals in new file
    cat("\n\nOverall control totals in file:\n")
    df %>% 
        summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>% 
        print()
    
    # Get control totals by date for new file
    dfByDate <- df %>% 
        group_by(date) %>%
        summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>%
        ungroup() %>%
        pivot_longer(-date, values_to="newValue")
        
    # If there is no checkFile, then just produce a plot of the key metrics
    if (is.null(checkFile)) {
        p1 <- dfByDate %>% 
            ggplot(aes(x=date, y=newValue)) + 
            geom_line() + 
            facet_wrap(~name, nrow=1, scales="free_y") + 
            labs(title="Control totals by date for new file (no reference file)", x="", y="Summed Value")
        print(p1)
    } else {
        # Check for similarity of fields, dates, and states
        cat("\n*** COMPARISONS TO REFERENCE FILE:", deparse(substitute(checkFile)))
        helperSimilarity(newData=names(df), refData=names(checkFile), label="column names")
        helperSimilarity(newData=df %>% pull(state) %>% unique(), 
                         refData=checkFile %>% pull(state) %>% unique(), 
                         label="states"
                         )
        helperSimilarity(newData=df %>% pull(date) %>% unique() %>% format("%Y-%m-%d"), 
                         refData=checkFile %>% pull(date) %>% unique() %>% format("%Y-%m-%d"), 
                         label="dates"
                         )
    
        # Check for similarity of control totals by date in files
        checkByDate <- checkFile %>% 
            group_by(date) %>%
            summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>%
            ungroup() %>%
            pivot_longer(-date, values_to="oldValue")
        cat("\n\n*** Difference of at least 5 and difference is at least 1%:\n\n")
        dfByDate %>% 
            inner_join(checkByDate) %>%
            filter(abs(newValue-oldValue)>=5, 
                   pmax(newValue, oldValue)>=1.01*pmin(newValue, oldValue)
                   ) %>%
            as.data.frame() %>%
            print()
        p1 <- dfByDate %>% 
            inner_join(checkByDate) %>%
            pivot_longer(-c(date, name), names_to="newOld") %>%
            ggplot(aes(x=date, y=value, group=newOld, color=newOld)) + 
            geom_line() + 
            facet_wrap(~name, nrow=1, scales="free_y") + 
            labs(title="Control totals by date for new and reference file", x="", y="Summed Value")
        print(p1)

        # Check for similarity of control totals by controlBy in files
        dfByControl <- df %>% 
            semi_join(select(checkFile, date), by="date") %>%
            group_by_at(vars(all_of(controlBy))) %>%
            summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>%
            ungroup() %>%
            pivot_longer(-all_of(controlBy), values_to="newValue")
        checkByControl <- checkFile %>% 
            group_by_at(vars(all_of(controlBy))) %>%
            summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>%
            ungroup() %>%
            pivot_longer(-all_of(controlBy), values_to="oldValue")
        cat("\n\n*** Difference of at least 5 and difference is at least 1%:\n\n")
        dfByControl %>% 
            inner_join(checkByControl) %>%
            filter(abs(newValue-oldValue)>=5, 
                   pmax(newValue, oldValue)>=1.01*pmin(newValue, oldValue)
                   ) %>%
            as.data.frame() %>%
            print()
    }
    
    # Return the processed data file
    df
    
}


# Function to create plots of consolidated metrics
plotConsolidatedMetrics <- function(dfMain, 
                                    dfHosp=NULL, 
                                    varMain=c("state", "cluster", "date", "pop", "cases", "deaths", "hosp"),
                                    varDropHosp=c("n", "pop"), 
                                    joinBy=c("state", "cluster", "date"), 
                                    subT=NULL, 
                                    nrowPlot2=1
                                    ) {
    
    # FUNCTION ARGUMENTS:
    # dfMain: the main file produced by assessClusters(), containing data by state-cluster-date
    # dfHosp: the separate file with hospital data (NULL means data already available in dfMain)
    # varMain: variables to keep from dfMain
    # varDropHosp: variables to drop from dfHosp
    # joinBy: variables for joining dfMain and dfHosp
    # subT: plot subtitle (NULL will use the defaults), 
    # nrowPlot2: number of rows for the facetting to use on plot 2
    
    if (is.null(subT)) {
        subT <- "Cases: new cases, Deaths: new deaths, Hosp: total in hospital (not new)"
    }
    
    # Filter dfMain to include only variables in varMain
    dfMain <- dfMain %>%
        select_at(vars(all_of(varMain)))
    
    # Left join dfMain to dfHosp unless dfHosp is NULL
    if (!is.null(dfHosp)) {
        dfHosp <- dfHosp %>%
            select_at(vars(all_of(names(dfHosp)[!(names(dfHosp) %in% varDropHosp)])))
        dfMain <- dfMain %>%
            left_join(dfHosp, by=all_of(joinBy))
    }
    
    # Check that variables state, cluster, date, pop are all available
    if (!(c("state", "cluster", "date", "pop") %in% names(dfMain) %>% all())) {
        stop("\nFunction requires variables state, cluster, date, and pop after processing\n")
    }
    
    # Create the relevant plotting data
    dfPlot <- dfMain %>%
        pivot_longer(-c(state, cluster, date, pop)) %>%
        filter(!is.na(value)) %>%
        rbind(mutate(., state="cluster")) %>%
        group_by_at(vars(all_of(c(joinBy, "name")))) %>%
        summarize(value=sum(value), pop=sum(pop)) %>%
        mutate(vpm=1000000*value/pop) %>%
        arrange(state, cluster, name, date) %>%
        group_by(state, cluster, name) %>%
        helperRollingAgg(origVar="vpm", newName="vpm7")    

    # Create facetted plots for totals by metric by segment
    p1 <- dfPlot %>%
        filter(!is.na(vpm7)) %>%
        ggplot(aes(x=date, y=vpm7)) + 
        geom_line(data=~filter(., state=="cluster"), aes(group=cluster, color=cluster), lwd=1.5) +
        geom_line(data=~filter(., state!="cluster"), aes(group=state), alpha=0.25) + 
        facet_grid(name ~ cluster, scales="free_y") + 
        labs(x="", 
             y="Rolling 7-day mean per million", 
             title="Key metrics by cluster (7-day rolling mean per million)", 
             subtitle=subT
             ) + 
        scale_x_date(date_breaks="1 months", date_labels="%b") + 
        theme(axis.text.x=element_text(angle=90))
    print(p1)

    # Create all-segment plot by metric
    p2 <- dfPlot %>%
        filter(!is.na(vpm7)) %>%
        ggplot(aes(x=date, y=vpm7)) + 
        geom_line(data=~filter(., state=="cluster"), aes(group=cluster, color=cluster), lwd=1.5) +
        facet_wrap(~ name, scales="free_y", nrow=nrowPlot2) + 
        labs(x="", 
             y="Rolling 7-day mean per million", 
             title="Key metrics by cluster (7-day rolling mean per million)", 
             subtitle=subT
             ) + 
        scale_x_date(date_breaks="1 months", date_labels="%b") + 
        theme(axis.text.x=element_text(angle=90))
    print(p2)

    # Create all-metric plot by segment (define 100% as peak for segment-metric)
    p3 <- dfPlot %>%
        filter(!is.na(vpm7)) %>%
        group_by(state, cluster, name) %>%
        mutate(spm7=vpm7/max(vpm7)) %>%
        ggplot(aes(x=date, y=spm7)) + 
        geom_line(data=~filter(., state=="cluster"), aes(group=name, color=name), lwd=1) +
        facet_wrap(~ cluster, scales="free_y") + 
        labs(x="", 
             y="% of Maximum", 
             title="Key metrics by cluster (% of cluster maximum for variable)", 
             subtitle=subT
             ) + 
        scale_x_date(date_breaks="1 months", date_labels="%b") + 
        scale_color_discrete("variable") +
        theme(axis.text.x=element_text(angle=90))
    print(p3)
    
    # Return the plotting data
    dfPlot
    
}


# Function to convert a file to cumulative totals
makeCumulative <- function(df, 
                           typeVar="name", 
                           typeKeep=c("cases", "deaths", "tests"), 
                           valVar="vpm7", 
                           groupVars=c("state", "cluster", "name"), 
                           arrangeVars=c("date"), 
                           newName="cum7"
                           ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame containing the metrics
    # typeVar: the variable holding the metric type (default is 'name')
    # typeKeep: the values of typeVar to be kept
    # valVar: the variable holding the metric value (default is 'vpm7')
    # groupVars: groups for calculating cumulative sum
    # arrangeVars: variables to be sorted by before calculating cumulative sum
    # newName: the name for the new variable

    # Create the cumulative data
    dfCum <- df %>%
        filter(get(typeVar) %in% typeKeep, !is.na(get(valVar))) %>%
        arrange_at(vars(all_of(c(groupVars, arrangeVars)))) %>%
        group_by_at(groupVars) %>%
        mutate(!!newName:=cumsum(get(valVar))) %>%
        ungroup()

    # Return the processed data
    dfCum
    
}


# Function to find and flag states that are high on a key value or change in key value
findFlagStates <- function(df, 
                           keyMetricVal, 
                           keyMetricVar="name", 
                           cumVar="cum7", 
                           prevDays=30, 
                           nFlag=5
                           ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing the cumulative data
    # keyMetricVal: the metric of interest (e.g., "deaths", "cases", "tests")
    # keyMetricVar: the variable name for the column containing the metric of interest
    # cumVar: variable containing the cumulative totals
    # prevDays: the number of days previous to use for defining growth
    # nFlag: the number of states to flag for either total or growth (top-n of each)
    
    # Find top-5 in either total or recent increase
    dfFlag <- df %>%
        filter(get(keyMetricVar)==keyMetricVal, state!="cluster") %>%
        select_at(vars(all_of(c("state", "date", cumVar)))) %>%
        group_by(state) %>%
        summarize(maxVal=max(get(cumVar)), 
                  tminus=sum(ifelse(date==max(date)-lubridate::days(prevDays), get(cumVar), 0))
                  ) %>%
        ungroup() %>%
        mutate(growth=maxVal-tminus, 
               rkTotal=min_rank(-maxVal), 
               rkGrowth=min_rank(-growth), 
               flag=ifelse(pmin(rkTotal, rkGrowth)<=nFlag, 1, 0)
               ) %>%
        arrange(-flag, rkTotal)

    # Return the top values as a vector of states
    dfFlag %>%
        filter(flag==1) %>%
        pull(state)
    
}


# Function to plot cumulative data
plotCumulativeData <- function(df, 
                               keyMetricp2,
                               flagsp2,
                               p2Desc=keyMetricp2,
                               keyVar="cum7", 
                               makep1=FALSE, 
                               makep2=TRUE
                               ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame of cumulative data
    # keyMetricp2: the key metric to be plotted in the second plot (e.g., "deaths", "cases", "tests")
    # flagsp2: states to be treated as flagged in the second plot
    # p2Desc: the description to be used in plot 2
    # keyVar: the key variable to plot
    # makep1: boolean, whether to make the first plot
    # makep2: boolean, whether to make the second plot
    
    # Plot the cumulative data by cluster (if flag is set for producing this)
    if (isTRUE(makep1)) {
        p1 <- df %>%
            filter(state=="cluster") %>%
            ggplot(aes(x=date, y=get(keyVar))) + 
            geom_line(aes(group=cluster, color=cluster)) + 
            facet_wrap(~name, nrow=1, scales="free_y") + 
            scale_x_date(date_breaks="1 months", date_labels="%m") + 
            labs(x="Month", 
                 y="Cumulative Burden (per million)", 
                 title="Cumulative burden by segment (per million)"
                 )
        print(p1)
    }
    
    
    # Plot the cumulative totals over time for one metric, and flag the highest
    if (isTRUE(makep2)) {
        p2 <- df %>%
            filter(state!="cluster", name==keyMetricp2) %>%
            mutate(bold=ifelse(state %in% flagsp2, 1, 0)) %>%
            ggplot(aes(x=date, y=get(keyVar))) + 
            geom_line(aes(group=state, color=cluster, alpha=0.4+0.6*bold, size=0.5+0.5*bold)) + 
            geom_text(data=~filter(., bold==1, date==max(date)), 
                      aes(x=date+lubridate::days(10), 
                          label=paste0(state, ": ", round(get(keyVar), 0)), 
                          color=cluster
                          ), 
                      size=3, 
                      fontface="bold"
                      ) +
            scale_x_date(date_breaks="1 months", date_labels="%m") + 
            scale_alpha_identity() +
            scale_size_identity() +
            labs(x="Month", 
                 y=paste0("Cumulative ", p2Desc, " (per million)"), 
                 title=paste0("Cumulative coronavirus ", p2Desc, " by state (per million)"), 
                 subtitle="Top 5 states for total and growth rate are bolded and labelled"
                 )
        print(p2)
    }
    
}

Running Key Analyses (COVID Tracking Project)

The data from COVID Tracking Project can be loaded and analyzed using the functions above, a variable mapping file, and a main function that calls the other functions and returns a list:

# STEP 0: Create a variable mapping file
varMapper <- c("cases"="Cases", 
               "newCases"="Increase in cases, most recent 30 days",
               "casesroll7"="Rolling 7-day mean cases", 
               "deaths"="Deaths", 
               "newDeaths"="Increase in deaths, most recent 30 days",
               "deathsroll7"="Rolling 7-day mean deaths", 
               "cpm"="Cases per million",
               "cpm7"="Cases per day (7-day rolling mean) per million", 
               "newcpm"="Increase in cases, most recent 30 days, per million",
               "dpm"="Deaths per million", 
               "dpm7"="Deaths per day (7-day rolling mean) per million", 
               "newdpm"="Increase in deaths, most recent 30 days, per million", 
               "hpm7"="Currently Hospitalized per million (7-day rolling mean)", 
               "tpm"="Tests per million", 
               "tpm7"="Tests per million per day (7-day rolling mean)"
               )


# Function to download/load, process, segment, and analyze data from COVID Tracking Project
readRunCOVIDTrackingProject <- function(thruLabel, 
                                        downloadTo=NULL, 
                                        readFrom=downloadTo, 
                                        compareFile=NULL,
                                        dfPerCapita=NULL,
                                        useClusters=NULL,
                                        hierarchical=TRUE,
                                        returnList=!hierarchical, 
                                        kCut=6,
                                        reAssignState=vector("list", 0),
                                        makeCumulativePlots=TRUE,
                                        ...
                                        ) {
    
    # FUNCTION ARGUMENTS:
    # thruLabel: the label for when the data are through (e.g., "Aug 30, 2020")
    # donwloadTo: download the most recent COVID Tracking Project data to this location
    #             NULL means do not download any data
    # readFrom: location for reading in the COVID Tracking Project data (defaults to donwloadTo)
    # compareFile: name of the file to use for comparisons when reading in raw data (NULL means no comparison)
    # dfPerCapita: file can be passed directly, which bypasses the loading and processing steps
    # useClusters: file containing clusters by state (NULL means make the clusters from the data)
    # hierarchical: boolean, should hierarchical clusters be produced (if FALSE, will be k-means)?
    # returnList: boolean, should a list be returned or just the cluster object?
    # kCut: number of segments when cutting the hierarchical tree
    # reAssignState: mapping file for assigning a state to another state's cluster
    #                format list("stateToChange"="stateClusterToAssign")
    # makeCumulativePlots: whether to make plots of cumulative metrics
    # ...: arguments to be passed to clusterStates(), will be used only if useClusters is NULL
    
    
    # STEP 1: Get state data
    stateData <- getStateData()
    
    
    # STEPS 2-4 are run only if dfPerCapita does not exist
    if (is.null(dfPerCapita)) {
        
        # STEP 2a: Download latest COVID Tracking Project data (if requested)
        if (!is.null(downloadTo)) downloadCOVIDbyState(fileName=downloadTo)
    
        # STEP 2b: Read-in COVID Tracking Project data
        dfRaw <- readCOViDbyState(readFrom, checkFile=compareFile)
        glimpse(dfRaw)
    
        # STEP 3: Process the data so that it includes all requested key variables
        varsFilter <- c("date", "state", "positiveIncrease", "deathIncrease", 
                        "hospitalizedCurrently", "totalTestResultsIncrease"
                        )
        dfFiltered <- processCVData(dfRaw, 
                                    varsKeep=varsFilter, 
                                    varsRename=c(positiveIncrease="cases", 
                                                 deathIncrease="deaths", 
                                                 hospitalizedCurrently="hosp", 
                                                 totalTestResultsIncrease="tests"
                                                 )
                                    )
        glimpse(dfFiltered)
    
        # STEP 4: Convert to per capita
        dfPerCapita <- helperMakePerCapita(dfFiltered, 
                                           mapVars=c("cases"="cpm", "deaths"="dpm", 
                                                     "hosp"="hpm", "tests"="tpm"
                                                     ), 
                                           popData=stateData
                                           )
        glimpse(dfPerCapita)
        
    } else {
        dfRaw <- NULL
        dfFiltered <- NULL
    }
    
    
    # STEP 5: Create the clusters (if they have not been passed)
    if (is.null(useClusters)) {
        # Run the clustering process
        clData <- clusterStates(df=dfPerCapita, hierarchical=hierarchical, returnList=returnList, ...)
        # If hierarchical clusters, cut the tree, otherwise use the output object directly
        if (hierarchical) {
            useClusters <- cutree(clData, k=kCut)
        } else {
            useClusters <- clData$objCluster$cluster
        }
        # If requested, manually assign clusters to the cluster for another state
        for (xNum in seq_len(length(reAssignState))) {
            useClusters[names(reAssignState)[xNum]] <- useClusters[reAssignState[[xNum]]]
        }

    }
    
    
    # STEP 6: Create the cluster assessments
    plotData <- assessClusters(useClusters, 
                               dfState=stateData, 
                               dfBurden=dfPerCapita,
                               thruLabel=thruLabel,
                               plotsTogether=TRUE
                               )

    
    # STEP 7: Plot the consolidated metrics
    subT <- "Cases: new cases, Deaths: new deaths, Hosp: total in hospital (not new), Tests: new tests"
    consolidatedPlotData <- plotConsolidatedMetrics(plotData, 
                                                    varMain=c("state", "cluster", "date", "pop",
                                                              "cases", "deaths", "hosp", "tests"
                                                              ), 
                                                    subT=subT, 
                                                    nrowPlot2=2
                                                    )
    
    # STEP 8: Create cumulative metrics if requested
    if (makeCumulativePlots) {
        consPos <- consolidatedPlotData %>%
            ungroup() %>%
            select(state, cluster, date, name, vpm7) %>%
            arrange(state, cluster, date, name) %>%
            pivot_wider(-vpm7, names_from="name", values_from="vpm7") %>%
            mutate(pctpos=cases/tests) %>%
            pivot_longer(-c(state, cluster, date), values_to="vpm7") %>%
            filter(!is.na(vpm7))
        clCum <- makeCumulative(consPos)
        plotCumulativeData(clCum, 
                           keyMetricp2="", 
                           flagsp2="", 
                           makep1=TRUE, 
                           makep2=FALSE
                           )
        plotCumulativeData(clCum, 
                           keyMetricp2="deaths", 
                           flagsp2=findFlagStates(clCum, keyMetricVal = "deaths")
                           )
        plotCumulativeData(clCum, 
                           keyMetricp2="cases", 
                           flagsp2=findFlagStates(clCum, keyMetricVal = "cases")
                           )
        plotCumulativeData(clCum, 
                           keyMetricp2="tests", 
                           flagsp2=findFlagStates(clCum, keyMetricVal = "tests")
                           )
    } else {
        clCum <- NULL
    }
    
    
    # STEP 9: Return a list of the key data
    list(stateData=stateData, 
         dfRaw=dfRaw, 
         dfFiltered=dfFiltered, 
         dfPerCapita=dfPerCapita, 
         useClusters=useClusters, 
         plotData=plotData, 
         consolidatedPlotData=consolidatedPlotData, 
         clCum=clCum
         )
    
    
}


# Test function for hierarchical clustering with Vermont reassigned to New Hampshire
test_hier5 <- readRunCOVIDTrackingProject(thruLabel="Aug 20, 2020", 
                                          readFrom="./RInputFiles/Coronavirus/CV_downloaded_200820.csv", 
                                          hierarchical=TRUE,
                                          kCut=6, 
                                          reAssignState=list("VT"="NH"), 
                                          minShape=3, 
                                          ratioDeathvsCase = 5, 
                                          ratioTotalvsShape = 0.5, 
                                          minDeath=100, 
                                          minCase=10000
                                          )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character(),
##   dataQualityGrade = col_character(),
##   lastUpdateEt = col_character(),
##   dateModified = col_datetime(format = ""),
##   checkTimeEt = col_character(),
##   dateChecked = col_datetime(format = ""),
##   hash = col_character(),
##   grade = col_logical()
## )
## See spec(...) for full column specifications.
## 
## File is unique by state and date
## 
## 
## Overall control totals in file:
## # A tibble: 1 x 3
##   positiveIncrease deathIncrease hospitalizedCurrently
##              <dbl>         <dbl>                 <dbl>
## 1          5546056        166127               6486294

## Observations: 9,449
## Variables: 53
## $ date                        <date> 2020-08-20, 2020-08-20, 2020-08-20, 20...
## $ state                       <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive                    <dbl> 5332, 112449, 54765, 0, 196280, 644751,...
## $ negative                    <dbl> 307315, 784330, 593744, 1514, 922163, 9...
## $ pending                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ hospitalizedCurrently       <dbl> 51, 1105, 499, NA, 1070, 6212, 238, 47,...
## $ hospitalizedCumulative      <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ inIcuCurrently              <dbl> NA, NA, NA, NA, 388, 1707, NA, NA, 26, ...
## $ inIcuCumulative             <dbl> NA, 1348, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently       <dbl> 6, NA, 108, NA, 233, NA, NA, NA, 12, NA...
## $ onVentilatorCumulative      <dbl> NA, 734, 488, NA, NA, NA, NA, NA, NA, N...
## $ recovered                   <dbl> 1513, 44684, 48458, NA, 28471, NA, 5759...
## $ dataQualityGrade            <chr> "A", "B", "A", "C", "A+", "B", "A", "B"...
## $ lastUpdateEt                <chr> "8/20/2020 0:00", "8/20/2020 11:00", "8...
## $ dateModified                <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ checkTimeEt                 <chr> "8/19/2020 20:00", "8/20/2020 7:00", "8...
## $ death                       <dbl> 29, 1974, 641, 0, 4684, 11686, 1800, 44...
## $ hospitalized                <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ dateChecked                 <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ totalTestsViral             <dbl> 312647, 891813, 648509, NA, 1116897, 10...
## $ positiveTestsViral          <dbl> 4970, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral          <dbl> 307360, NA, 593744, NA, NA, NA, NA, NA,...
## $ positiveCasesViral          <dbl> 5332, 107483, 54765, 0, 194734, 644751,...
## $ deathConfirmed              <dbl> 29, 1905, NA, NA, 4429, NA, NA, 3572, N...
## $ deathProbable               <dbl> NA, 69, NA, NA, 255, NA, NA, 886, NA, 6...
## $ totalTestEncountersViral    <dbl> NA, NA, NA, NA, NA, NA, 895207, NA, NA,...
## $ totalTestsPeopleViral       <dbl> NA, NA, NA, 1514, NA, NA, 645170, NA, 2...
## $ totalTestsAntibody          <dbl> NA, NA, NA, NA, 255456, NA, 150931, NA,...
## $ positiveTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 10406, NA, NA, ...
## $ negativeTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 140525, NA, NA,...
## $ totalTestsPeopleAntibody    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntigen  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsAntigen           <dbl> NA, NA, 10358, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ fips                        <dbl> 2, 1, 5, 60, 4, 6, 8, 9, 11, 10, 12, 13...
## $ positiveIncrease            <dbl> 85, 971, 549, 0, 723, 5920, 270, 118, 5...
## $ negativeIncrease            <dbl> 1713, 10462, 6680, 0, 6481, 81363, 4657...
## $ total                       <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResults            <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResultsIncrease    <dbl> 1798, 11433, 7229, 0, 7204, 87283, 7348...
## $ posNeg                      <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ deathIncrease               <dbl> 0, 30, 10, 0, 50, 163, 12, 1, 1, 0, 119...
## $ hospitalizedIncrease        <dbl> 0, 250, 47, 0, 123, 0, 3, 72, 0, 0, 450...
## $ hash                        <chr> "c83a1d575a597788adccbe170950b8d197754b...
## $ commercialScore             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## 
## 
## Control totals - note that validState other than TRUE will be discarded
## 
## # A tibble: 2 x 6
##   validState   cases deaths  hosp    tests     n
##   <lgl>        <dbl>  <dbl> <dbl>    <dbl> <dbl>
## 1 FALSE        29761    385    NA   392743   790
## 2 TRUE       5516295 165742    NA 69638073  8659
## Observations: 8,659
## Variables: 6
## $ date   <date> 2020-08-20, 2020-08-20, 2020-08-20, 2020-08-20, 2020-08-20,...
## $ state  <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases  <dbl> 85, 971, 549, 723, 5920, 270, 118, 55, 75, 4555, 2759, 235, ...
## $ deaths <dbl> 0, 30, 10, 50, 163, 12, 1, 1, 0, 119, 55, 3, 8, 9, 27, 11, 0...
## $ hosp   <dbl> 51, 1105, 499, 1070, 6212, 238, 47, 78, 40, 5067, 2506, 187,...
## $ tests  <dbl> 1798, 11433, 7229, 7204, 87283, 7348, 12420, 2326, 2087, 295...
## Observations: 8,659
## Variables: 14
## $ date   <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01-26,...
## $ state  <chr> "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", ...
## $ cases  <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 1, 1, 0, 3, 1, 1, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests  <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 1, 1, 0, 3, 1, 1, 0, 0, ...
## $ cpm    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000...
## $ dpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000...
## $ cpm7   <dbl> NA, NA, NA, 0.00000000, 0.01992331, 0.01992331, 0.01992331, ...
## $ dpm7   <dbl> NA, NA, NA, 0.00000000, 0.00000000, 0.00000000, 0.00000000, ...
## $ hpm7   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7   <dbl> NA, NA, NA, 0.00000000, 0.01992331, 0.01992331, 0.01992331, ...

## 
## Recency is defined as 2020-07-22 through current
## 
## Recency is defined as 2020-07-22 through current

# Test function for k-means clustering using the per capita data file previously created
test_km5 <- readRunCOVIDTrackingProject(thruLabel="Aug 20, 2020", 
                                        dfPerCapita=test_hier5$dfPerCapita,
                                        hierarchical=FALSE,
                                        makeCumulativePlots=FALSE,
                                        minShape=3, 
                                        ratioDeathvsCase = 5, 
                                        ratioTotalvsShape = 0.5, 
                                        minDeath=100, 
                                        minCase=10000, 
                                        nCenters=5,
                                        testCenters=1:10, 
                                        iter.max=20,
                                        nstart=10, 
                                        seed=2008261400
                                        )

## 
## Cluster means and counts
##                 1     2    3     4    5
## .           10.00 14.00 9.00 14.00 4.00
## totalCases   0.28  0.86 0.71  0.50 0.78
## totalDeaths  0.41  1.42 2.73  0.99 5.32
## cases_3      0.03  0.01 0.05  0.02 0.10
## deaths_3     0.27  0.08 0.13  0.06 0.15
## cases_4      0.07  0.06 0.26  0.12 0.50
## deaths_4     1.27  0.63 1.73  1.00 2.58
## cases_5      0.06  0.07 0.24  0.19 0.23
## deaths_5     0.69  0.66 1.77  1.50 1.59
## cases_6      0.08  0.18 0.11  0.15 0.07
## deaths_6     0.42  0.61 0.70  1.01 0.44
## cases_7      0.23  0.44 0.21  0.28 0.06
## deaths_7     0.70  1.60 0.42  0.81 0.18
## cases_8      0.16  0.22 0.14  0.19 0.04
## deaths_8     0.70  1.38 0.25  0.61 0.06

## 
## Recency is defined as 2020-07-22 through current
## 
## Recency is defined as 2020-07-22 through current

# Run in a session that has access to the old data files
# identical(test_hier5$useClusters, clustVec)  # TRUE
# identical(test_km5$useClusters, testCluster_km5$objCluster$cluster)  # TRUE

Cluster files produced using the new functions and the existing August 20, 2020 data are identical to ‘clustVec’ and ‘testCluster_km5’ produced in the _v001 code.

Further, new data is downloaded that is current through September 30, 2020, and the test_hier5 clusters are used for analysis:

# Test function for hierarchical clustering with Vermont reassigned to New Hampshire
locDownload <- "./RInputFiles/Coronavirus/CV_downloaded_201001.csv"
test_hier5_201001 <- readRunCOVIDTrackingProject(thruLabel="Sep 30, 2020", 
                                                 readFrom=locDownload, 
                                                 compareFile=test_hier5$dfRaw,
                                                 useClusters=test_hier5$useClusters
                                                 )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character(),
##   dataQualityGrade = col_character(),
##   lastUpdateEt = col_character(),
##   dateModified = col_datetime(format = ""),
##   checkTimeEt = col_character(),
##   dateChecked = col_datetime(format = ""),
##   fips = col_character(),
##   totalTestResultsSource = col_character(),
##   hash = col_character(),
##   grade = col_logical()
## )
## See spec(...) for full column specifications.
## 
## File is unique by state and date
## 
## 
## Overall control totals in file:
## # A tibble: 1 x 3
##   positiveIncrease deathIncrease hospitalizedCurrently
##              <dbl>         <dbl>                 <dbl>
## 1          7198509        198929               7830770
## 
## *** COMPARISONS TO REFERENCE FILE: compareFile
## 
## Checkin for similarity of: column names
## In reference but not in current: 
## In current but not in reference: totalTestResultsSource
## 
## Checkin for similarity of: states
## In reference but not in current: 
## In current but not in reference: 
## 
## Checkin for similarity of: dates
## In reference but not in current: 
## In current but not in reference: 2020-09-30 2020-09-29 2020-09-28 2020-09-27 2020-09-26 2020-09-25 2020-09-24 2020-09-23 2020-09-22 2020-09-21 2020-09-20 2020-09-19 2020-09-18 2020-09-17 2020-09-16 2020-09-15 2020-09-14 2020-09-13 2020-09-12 2020-09-11 2020-09-10 2020-09-09 2020-09-08 2020-09-07 2020-09-06 2020-09-05 2020-09-04 2020-09-03 2020-09-02 2020-09-01 2020-08-31 2020-08-30 2020-08-29 2020-08-28 2020-08-27 2020-08-26 2020-08-25 2020-08-24 2020-08-23 2020-08-22 2020-08-21
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("date", "name")
##          date                  name newValue oldValue
## 1  2020-02-15      positiveIncrease        0        7
## 2  2020-02-16      positiveIncrease        0        7
## 3  2020-02-17      positiveIncrease        0       15
## 4  2020-02-18      positiveIncrease        0        9
## 5  2020-02-19      positiveIncrease        0       10
## 6  2020-02-20      positiveIncrease        0       13
## 7  2020-02-21      positiveIncrease        0       11
## 8  2020-02-22      positiveIncrease        0       13
## 9  2020-02-23      positiveIncrease        0       16
## 10 2020-02-24      positiveIncrease        0       26
## 11 2020-02-25      positiveIncrease        0       31
## 12 2020-02-26      positiveIncrease        0       29
## 13 2020-02-27      positiveIncrease        0       27
## 14 2020-02-28      positiveIncrease        0       40
## 15 2020-02-29      positiveIncrease       18       24
## 16 2020-03-01      positiveIncrease       16       78
## 17 2020-03-02      positiveIncrease       44       82
## 18 2020-03-03      positiveIncrease       48      100
## 19 2020-03-04      positiveIncrease       62      110
## 20 2020-03-05      positiveIncrease      103      151
## 21 2020-03-06      positiveIncrease      108      137
## 22 2020-03-07      positiveIncrease      175      217
## 23 2020-03-08      positiveIncrease      198      267
## 24 2020-03-09      positiveIncrease      292      367
## 25 2020-03-10      positiveIncrease      387      441
## 26 2020-03-11      positiveIncrease      509      527
## 27 2020-03-13      positiveIncrease     1072     1025
## 28 2020-03-15      positiveIncrease     1291     1251
## 29 2020-03-16      positiveIncrease     1739     1560
## 30 2020-03-17      positiveIncrease     2588     3613
## 31 2020-03-18      positiveIncrease     3089     3171
## 32 2020-03-20      positiveIncrease     6147     6255
## 33 2020-03-21      positiveIncrease     6793     6885
## 34 2020-03-22      positiveIncrease     9125     9259
## 35 2020-03-24      positiveIncrease    10769    10632
## 36 2020-04-03 hospitalizedCurrently    25729    25472
## 37 2020-04-29      positiveIncrease    26168    26641
## 38 2020-04-30      positiveIncrease    29975    29568
## 39 2020-05-08      positiveIncrease    27222    27605
## 40 2020-05-09      positiveIncrease    25218    24810
## 41 2020-05-10      positiveIncrease    21055    21504
## 42 2020-05-12      positiveIncrease    22890    22663
## 43 2020-05-26      positiveIncrease    16825    16629
## 44 2020-06-08      positiveIncrease    17209    17012
## 45 2020-06-19      positiveIncrease    31471    31046
## 46 2020-06-21      positiveIncrease    27928    27284
## 47 2020-06-23      positiveIncrease    33447    33021
## 48 2020-06-24      positiveIncrease    39075    38684
## 49 2020-06-25      positiveIncrease    39637    39072
## 50 2020-06-27      positiveIncrease    43164    43783
## 51 2020-06-29      positiveIncrease    39813    39175
## 52 2020-07-26      positiveIncrease    61000    61713
## 53 2020-07-28      positiveIncrease    59003    56229
## 54 2020-07-29      positiveIncrease    64408    66969
## 55 2020-08-02      positiveIncrease    46812    48266
## 56 2020-08-09      positiveIncrease    50627    51365
## 57 2020-08-16      positiveIncrease    42487    43083
## 58 2020-08-20      positiveIncrease    43758    43245
## 59 2020-08-20         deathIncrease     1134     1117
## Joining, by = c("date", "name")
## 
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("state", "name")

##   state                  name newValue oldValue
## 1    WA      positiveIncrease    70973    68687
## 2    WI hospitalizedCurrently    45951    51401
## Observations: 11,802
## Variables: 54
## $ date                        <date> 2020-09-30, 2020-09-30, 2020-09-30, 20...
## $ state                       <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive                    <dbl> 8780, 154701, 83697, 0, 218507, 810625,...
## $ negative                    <dbl> 448427, 994475, 949107, 1571, 1243641, ...
## $ pending                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestResults            <dbl> 457207, 1132039, 1029717, 1571, 1457575...
## $ hospitalizedCurrently       <dbl> 53, 776, 484, NA, 560, 3267, 268, 104, ...
## $ hospitalizedCumulative      <dbl> NA, 17257, 5354, NA, 22119, NA, 7558, 1...
## $ inIcuCurrently              <dbl> NA, NA, 218, NA, 115, 830, NA, NA, 28, ...
## $ inIcuCumulative             <dbl> NA, 1815, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently       <dbl> 7, NA, 93, NA, 55, NA, NA, NA, 9, NA, N...
## $ onVentilatorCumulative      <dbl> NA, 1025, 676, NA, NA, NA, NA, NA, NA, ...
## $ recovered                   <dbl> 4555, 67948, 75312, NA, 35261, NA, 6500...
## $ dataQualityGrade            <chr> "A", "A", "A+", "D", "A+", "B", "A", "B...
## $ lastUpdateEt                <chr> "9/30/2020 03:59", "9/30/2020 11:00", "...
## $ dateModified                <dttm> 2020-09-30 03:59:00, 2020-09-30 11:00:...
## $ checkTimeEt                 <chr> "09/29 23:59", "09/30 07:00", "09/29 20...
## $ death                       <dbl> 56, 2540, 1369, 0, 5650, 15792, 1952, 4...
## $ hospitalized                <dbl> NA, 17257, 5354, NA, 22119, NA, 7558, 1...
## $ dateChecked                 <dttm> 2020-09-30 03:59:00, 2020-09-30 11:00:...
## $ totalTestsViral             <dbl> 457207, 1132039, 1029717, 1571, NA, 147...
## $ positiveTestsViral          <dbl> 7807, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral          <dbl> 449109, NA, 949107, NA, NA, NA, NA, NA,...
## $ positiveCasesViral          <dbl> 8780, 137564, 80610, 0, 213934, 810625,...
## $ deathConfirmed              <dbl> 56, 2399, 1223, NA, 5382, NA, NA, 3610,...
## $ deathProbable               <dbl> NA, 141, 146, NA, 268, NA, NA, 898, NA,...
## $ totalTestEncountersViral    <dbl> NA, NA, NA, NA, NA, NA, 1341180, NA, 38...
## $ totalTestsPeopleViral       <dbl> NA, NA, NA, NA, 1457575, NA, 902242, NA...
## $ totalTestsAntibody          <dbl> NA, NA, NA, NA, 293098, NA, 167989, NA,...
## $ positiveTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 11733, NA, NA, ...
## $ negativeTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 156256, NA, NA,...
## $ totalTestsPeopleAntibody    <dbl> NA, 58393, NA, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen     <dbl> NA, NA, 11700, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntigen  <dbl> NA, NA, 3383, NA, NA, NA, NA, NA, NA, N...
## $ totalTestsAntigen           <dbl> NA, NA, 21856, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen        <dbl> NA, NA, 3300, NA, NA, NA, NA, NA, NA, N...
## $ fips                        <chr> "02", "01", "05", "60", "04", "06", "08...
## $ positiveIncrease            <dbl> 106, 1147, 942, 0, 323, 3200, 535, 221,...
## $ negativeIncrease            <dbl> 6045, 11312, 21205, 0, 3735, 88457, 674...
## $ total                       <dbl> 457207, 1149176, 1032804, 1571, 1462148...
## $ totalTestResultsSource      <chr> "posNeg", "posNeg", "posNeg", "posNeg",...
## $ totalTestResultsIncrease    <dbl> 6151, 12327, 21812, 0, 4047, 91657, 145...
## $ posNeg                      <dbl> 457207, 1149176, 1032804, 1571, 1462148...
## $ deathIncrease               <dbl> 0, 23, 19, 0, 18, 152, 7, 3, 1, 1, 175,...
## $ hospitalizedIncrease        <dbl> 0, 75, 0, 0, 72, 0, 28, 0, 0, 0, 253, 1...
## $ hash                        <chr> "1716aba4b705ff74814829d25487c6e57d2712...
## $ commercialScore             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## 
## 
## Control totals - note that validState other than TRUE will be discarded
## 
## # A tibble: 2 x 6
##   validState   cases deaths  hosp     tests     n
##   <lgl>        <dbl>  <dbl> <dbl>     <dbl> <dbl>
## 1 FALSE        52622    732    NA    442114   995
## 2 TRUE       7145887 198197    NA 103496519 10807
## Observations: 10,807
## Variables: 6
## $ date   <date> 2020-09-30, 2020-09-30, 2020-09-30, 2020-09-30, 2020-09-30,...
## $ state  <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases  <dbl> 106, 1147, 942, 323, 3200, 535, 221, 26, 82, 1948, 1720, 87,...
## $ deaths <dbl> 0, 23, 19, 18, 152, 7, 3, 1, 1, 175, 27, 0, 16, 4, 35, 20, 4...
## $ hosp   <dbl> 53, 776, 484, 560, 3267, 268, 104, 95, 72, 2097, 1896, 147, ...
## $ tests  <dbl> 6151, 12327, 21812, 4047, 91657, 14538, 12401, 1464, 1074, 1...
## Observations: 10,807
## Variables: 14
## $ date   <date> 2020-01-22, 2020-01-22, 2020-01-23, 2020-01-23, 2020-01-24,...
## $ state  <chr> "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", ...
## $ cases  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests  <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
## $ cpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm    <dbl> 0.0000000, 0.0000000, 0.1471796, 0.0000000, 0.0000000, 0.000...
## $ cpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm7   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7   <dbl> NA, NA, NA, NA, NA, NA, 0.04205130, 0.00000000, 0.06307695, ...

## 
## Recency is defined as 2020-09-01 through current
## 
## Recency is defined as 2020-09-01 through current

Segments appear to be on trend with previous analysis, with what was previously called “late pandemic” having peaked and the segments that had smaller deaths per million showing higher percentage increases.

USA Facts

The second section is for analysis of data from USA Facts. This site contains county-level data for the US focused on coronavirus cases and deaths. Data are provided with one row per county and one column for each date, and are thus unique by county. There are differences in state and US totals for coronavirus cases and deaths when reported by COVID Tracking Project and USA Facts, though trends, timing, and relative magnitudes are generally in agreement between the two sources.

Key Functions (USA Facts)

Functions for reading and analyzing data from USA Facts include:

# Function to plot per capita data by county
plotCountyPerCapita <- function(dfDisease, 
                                burdenVar,
                                useDate,
                                plotTitle="",
                                inclStates=NULL,
                                dfCounty=filter(pop_usafacts, countyFIPS!=0), 
                                popVar="population", 
                                highColor="darkblue", 
                                maxPerCap=NULL,
                                printPlot=TRUE, 
                                returnData=!printPlot
                                ) {
    
    # FUNCTION ARGUMENTS
    # dfDisease: file containing disease data
    # burdenVar: variable for disease burden (cumulative) on date
    # useDate: date for the analysis
    # plotTitle: title for the plot
    # inclStates: states to include (NULL means include all)
    # dfCounty: data for county-level population
    # popVar: variable for population in the dfCounty file
    # maxPerCap: the maximum amount to be used for per capita (everything at or above shaded the same)
    # highColor: the color to be used for high disease burden
    # printPlot: boolean, whether to print the plot
    # returnData: boolean, whether to return the underlying data (if FALSE, the plot object is returned)
    
    # Create the relevant data frame
    dfData <- dfDisease %>% 
        left_join(dfCounty, by=c("countyFIPS", "state")) %>% 
        filter(date %in% useDate, countyFIPS!=0, population>0) %>% 
        mutate(burden=1000000*get(burdenVar)/get(popVar))

    # Modify inclStates to be every state (if needed due to NULL)
    if (is.null(inclStates)) inclStates <- dfData %>% pull(state) %>% unique() %>% sort()
    
    # Create the relevant plot (this is necessary if printPlot is TRUE or returnData is FALSE)
    if (printPlot | !returnData) {
        p1 <- dfData %>%
            mutate(burden=if(is.null(maxPerCap)) burden else pmin(burden, maxPerCap)) %>%
            select(fips=countyFIPS, burden) %>% 
            usmap::plot_usmap(regions="counties", data=., values="burden", include=inclStates)
        if (is.null(maxPerCap)) 
            p1 <- p1 + scale_fill_continuous(plotTitle, low="white", high=highColor)
        else {
            p1 <- p1 + 
                scale_fill_continuous(plotTitle, low="white", high=highColor, limits=c(0, maxPerCap)) + 
                labs(caption=paste0("Values at/above ", maxPerCap, " plotted as ", maxPerCap))
        }
    }
    
    # Print the plot if requested
    if (printPlot) print(p1)
    
    # Return the data if requested, otherwise return the plot object
    if (returnData) dfData %>% filter(state %in% inclStates) else p1
    
}


# Function to make the cases and deaths by county plot
casesDeathsByCounty <- function(useDate,
                                inclStates=NULL,
                                caseData=pvtCases_20200903, 
                                deathData=pvtDeaths_20200903, 
                                highCaseColor="darkblue", 
                                highDeathColor="darkred", 
                                highCaseAmount=NULL, 
                                highDeathAmount=NULL,
                                mainTitle=NULL
                                ) {
    
    # FUNCTION ARGUMENTS:
    # useDate: the date to be plotted
    # inclStates: the states to be included (NULL means all)
    # caseData: the frame containing the cases date
    # deathData: the frame containing the death data
    # highCaseColor: color for the highest level of cases in a county
    # highDeathColor: color for the highest level of deaths in a county
    # highCaseAmount: cases at/above this level will be the same color (NULL means leave as-is)
    # highDeathAmount: deaths at/above this level will be the same color (NULL means leave as-is)
    # mainTitle: main title for the grid.arrange (NULL means useDate will be used)

    # Convert useDate to date if not already in that format
    if (!("Date") %in% class(useDate)) useDate <- as.Date(useDate)
    
    # Convert inclStates to be all states if not specified
    if (is.null(inclStates)) inclStates <- caseData %>% pull(state) %>% unique() %>% sort()
    
    # Create mainTitle if not passed
    if (is.null(mainTitle)) 
        mainTitle <- paste0("Per capita coronavirus burden as of ", format(useDate, "%B %d, %Y"))
    
    # Create the plot for deaths    
    deathPlot <- plotCountyPerCapita(deathData, 
                                     burdenVar="cumDeaths", 
                                     useDate=useDate, 
                                     plotTitle="Deaths\nper million", 
                                     inclStates=inclStates, 
                                     highColor=highDeathColor, 
                                     maxPerCap=highDeathAmount,
                                     printPlot=FALSE, 
                                     returnData=FALSE
                                     )

    # Create the plot for cases
    casesPlot <- plotCountyPerCapita(caseData, 
                                     burdenVar="cumCases", 
                                     useDate=useDate, 
                                     plotTitle="Cases\nper million", 
                                     inclStates=inclStates, 
                                     highColor=highCaseColor, 
                                     maxPerCap=highCaseAmount,
                                     printPlot=FALSE, 
                                     returnData=FALSE
                                     )

    # Print the combined plot object
    gridExtra::grid.arrange(casesPlot, 
                            deathPlot, 
                            nrow=1, 
                            top=grid::textGrob(mainTitle, 
                                               gp=grid::gpar(fontface=2, fontsize=12),
                                               hjust=0, 
                                               x=0.05
                                               )
                            )

}


# Function to plot evolution of county-level burdens
countyLevelEvolution <- function(dfBurden, 
                                 burdenVar, 
                                 inclStates=NULL, 
                                 topN=10, 
                                 topNDate=NULL, 
                                 printPlot=TRUE, 
                                 returnData=TRUE, 
                                 plotTitle=NULL, 
                                 countyPopFloor=0, 
                                 subT=NULL
                                 ) {
    
    # FUNCTION ARGUMENTS:
    # dfBurden: file containing the relevant per-capita burden data
    # burdenVar: the name of the variable containing the burden per-capita data
    # inclStates: states to be included (default NULL means include all)
    # topN: integer, number of counties to flag as "top"
    # topNDate: the data to use as the topN cutpoint (NULL means most recent)
    # printPlot: boolean, whether to print the plot
    # returnData: boolean, if TRUE return the per-capita data file, otherwise return the plot object
    # plotTitle: title for the plot (NULL means assume from burdenVar)
    # countyPopFloor: floor for county population for the county to be plotted
    # subT: subtitle for the chart (NULL means none)
    
    # Adjust inclStates if NULL
    if (is.null(inclStates)) inclStates <- dfBurden %>% count(state) %>% pull(state)
    
    # Create plotTitle if needed
    if (is.null(plotTitle)) {
        plotTitle <- if(stringr::str_detect(string=stringr::str_to_lower(burdenVar), pattern="deaths")) {
            "Cumulative per-capita deaths by county" 
        } else {
            "Cumulative per-capita cases by county"
        }
    }
    
    # Get all possible dates
    allDates <- dfBurden %>% count(date) %>% pull(date)
    
    # Get the data for the counties in the relevant states (return data only and do not plot)
    perCapData <- plotCountyPerCapita(dfBurden, 
                                      burdenVar=burdenVar, 
                                      useDate=allDates, 
                                      inclStates=inclStates, 
                                      printPlot=FALSE, 
                                      returnData=TRUE
                                      )

    # Get the relevant top-N date and convert to Date if not already of that type
    if (is.null(topNDate)) topNDate <- perCapData %>% pull(date) %>% max()
    if (!("Date" %in% class(topNDate))) topNDate <- as.Date(topNDate)
    
    # Get the top-n counties by burdenVar
    # Top 10 counties hit by FIPS, counting only those that exceed the population floor
    topN <- perCapData %>%
        filter(date==topNDate, population>=countyPopFloor) %>%
        arrange(-burden) %>%
        head(topN) %>%
        pull(countyFIPS)

    # Update perCapData with easy-read county name and bolding instructions
    perCapData <- perCapData %>%
        mutate(county=paste0(str_replace(countyName.x, " County", ""), " (", state, ")"), 
               bold=ifelse(countyFIPS %in% topN, 1, 0)
               ) %>%
        arrange(date)
    
    # Create the plot of all counties with the topN flagged
    # Evolution of per capita deaths by date in the northeast
    p1 <- perCapData %>%
        filter(population>=countyPopFloor) %>%
        ggplot(aes(x=date, y=burden)) + 
        geom_line(aes(group=countyFIPS, color=state, alpha=0.25 + 0.75*bold, size=0.5+0.5*bold)) + 
        scale_alpha_identity() + 
        scale_size_identity() + 
        geom_text(data=~filter(., bold==1, date==max(date)), 
                  aes(x=date+lubridate::days(2), label=paste0(county, ": ", round(burden)), color=state), 
                  size=3, 
                  fontface="bold", 
                  hjust=0
              ) +
        scale_x_date(date_breaks="1 months", date_labels="%m", expand=expand_scale(mult=c(0, 0.4))) + 
        labs(x="", y="Burden per million people", title=plotTitle) + 
        theme(legend.position="bottom")
    
    # Add the subtitle if passed
    if (!is.null(subT)) p1 <- p1 + labs(subtitle=subT)

    # Print the plot if requested
    if (printPlot) print(p1)
    
    # Return the relevant object
    if (returnData) perCapData else p1
    
}



# Function for calling assessClusters() for county-level segments
helperAssessCountyClusters <- function(vecCluster, 
                                       dfPop, 
                                       dfBurden, 
                                       dfPopGeoVar="state", 
                                       dfPopPopVar="population", 
                                       showCum=FALSE,
                                       ...
                                       ) {
    
    # FUNCTION ARGUMENTS:
    # vecCluster: the named cluster vector
    # dfPop: the data frame containing the population data
    # dfBurden: the data frame containing the burden statistics by county and date
    # showCum: boolean, whether to create the cumulative burdens chart
    # ...: other arguments to pass to assessClusters()

    # Run the process    
    plotAssess <- assessClusters(vecCluster, 
                                 dfState=dfPop %>% 
                                     group_by_at(dfPopGeoVar) %>% 
                                     summarize(pop=mean(get(dfPopPopVar))) %>% 
                                     ungroup(), 
                                 dfBurden=select(dfBurden, -population), 
                                 isCounty=TRUE, 
                                 ...
                                 )
    
    # Create the cumulative burdens chart if requested
    if (showCum) {
        p1 <- plotAssess %>%
            select(cluster, date, pop, cases, deaths) %>%
            group_by(cluster, date) %>%
            summarize_if(is.numeric, sum, na.rm=TRUE) %>%
            arrange(date) %>%
            mutate(cpmcum=cumsum(cases)*1000000/pop, dpmcum=cumsum(deaths)*1000000/pop) %>%
            ungroup() %>%
            select(cluster, date, cases=cpmcum, deaths=dpmcum) %>%
            pivot_longer(-c(cluster, date)) %>%
            ggplot(aes(x=date, y=value, color=cluster)) + 
            geom_line(size=1) + 
            geom_text(data=~filter(., date==max(date)), 
                      aes(x=date+lubridate::days(2), label=round(value)), 
                      size=3, 
                      hjust=0
                      ) +
            labs(x="", title="Cumulative burden per million people by segment", y="") +
            facet_wrap(~c("cases"="Cases per million", "deaths"="Deaths per million")[name], scales="free_y") +
            scale_x_date(date_breaks="1 months", date_labels="%b", expand=expand_scale(c(0, 0.1)))
        print(p1)
    }
    
    # Return the plot object
    plotAssess
    
}



# Helper function to make the overall cluster summary statistics
helperClusterSummaryPlots <- function(dfState, 
                                      dfPlot,
                                      showMap, 
                                      clusterPlotsTogether,
                                      weightedMean=TRUE,
                                      mapLevel="states"
                                      ) {
    
    # FUNCTION ARGUMENTS:
    # dfState: contains the state/county-level data
    # dfPlot: contains the joined data for plotting
    # showMap: boolean for whether to create a map (if FALSE, segment membership counts are shown instead)
    # clusterPlotsTogether: boolean, whether to put all four plots on the same page
    # weightedMean: boolean, whether to create weighted mean by segment (if FALSE, median by segment is taken)
    # mapLevel: the level to be used on the map
    
    # Reorder the cluster levels in dfState to match dfPlot
    # Convert factor order to match dfPlot (can be reordered by argument to the calling function)
    dfState <- dfState %>%
        mutate(cluster=factor(cluster, levels=levels(dfPlot$cluster)))
    
    # Plot the segments on a map or show segment membership
    if (showMap) {
        if (mapLevel=="counties") {
            dfMap <- dfState %>%
                mutate(fips=stringr::str_pad(state, width=5, side="left", pad="0")) %>%
                select(fips, cluster)
        } else {
            dfMap <- dfState
        }
        # Create the map
        p1 <- usmap::plot_usmap(regions=mapLevel, data=dfMap, values="cluster") + 
            scale_fill_discrete("cluster") + 
            theme(legend.position="right")
    } else {
        p1 <- dfState %>%
            count(cluster) %>%
            ggplot(aes(x=fct_rev(cluster), y=n)) + 
            geom_col(aes(fill=cluster)) +
            geom_text(aes(y=n/2, label=n)) +
            coord_flip() + 
            labs(x="", y="# Counties", title="Membership by segment")
    }
    
    # Plot the population totals by segment
    # Show population totals by cluster
    p2 <- dfState %>%
        group_by(cluster) %>%
        summarize(pop=sum(pop)/1000000) %>%
        ggplot(aes(x=fct_rev(cluster), y=pop)) + 
        geom_col(aes(fill=cluster)) + 
        geom_text(aes(y=pop/2, label=round(pop))) + 
        labs(y="2015 population (millions)", x="Cluster", title="Population by cluster (millions)") +
        coord_flip()
    
    # Plot the rolling 7-day mean daily disease burden by cluster
    # Create the p3Data to be either median of all elements in cluster or weighted mean
    p3 <- dfPlot %>%        
        select(date, cluster, cases=cpm7, deaths=dpm7, pop) %>%
        pivot_longer((-c(date, cluster, pop))) %>%
        filter(!is.na(value)) %>%
        group_by(date, cluster, name) %>%
        summarize(mdnValue=median(value), wtdValue=sum(pop*value)/sum(pop)) %>%
        ggplot(aes(x=date, y=if(weightedMean) wtdValue else mdnValue)) +
        geom_line(aes(group=cluster, color=cluster)) +
        facet_wrap(~name, scales="free_y") +
        labs(x="",
             y="Rolling 7-day mean, per million",
             title="Rolling 7-day mean daily disease burden, per million",
             subtitle=paste0(if(weightedMean) "Weighted mean" else "Median", 
                             " per day for states assigned to cluster"
                             )
             ) + 
        scale_x_date(date_breaks="1 months", date_labels="%b")
    
    # Plot the total cases and total deaths by cluster
    p4 <- dfPlot %>%
        group_by(cluster) %>%
        summarize(cases=sum(cases), deaths=sum(deaths)) %>%
        pivot_longer(-cluster) %>%
        ggplot(aes(x=fct_rev(cluster), y=value/1000)) + 
        geom_col(aes(fill=cluster)) + 
        geom_text(aes(y=value/2000, label=round(value/1000))) +
        coord_flip() + 
        facet_wrap(~varMapper[name], scales="free_x") + 
        labs(x="Cluster", y="Burden (000s)", title="Total cases and deaths by segment")

    # Place the plots together if plotsTogether is TRUE, otherwise just print
    if (isTRUE(clusterPlotsTogether)) {
        gridExtra::grid.arrange(p1, p2, p3, p4, nrow=2, ncol=2)
    } else {
        print(p1); print(p2); print(p3); print(p4)
    }
    
}



# Helper function to make total and per capita by state (calls its own helper function)
helperCallTotalPerCapita <- function(dfPlot, 
                                     thruLabel
                                     ) {
    
    # FUNCTION ARGUMENTS:
    # dfPlot: the plotting data frame
    # thruLabel: the date that data are through
    
    # Plot total cases and total deaths by state, colored by cluster
    helperBarDeathsCases(dfPlot, 
                         numVars=c("cases", "deaths"), 
                         title=paste0("Coronavirus impact by state through ", thruLabel), 
                         xVar=c("state"), 
                         fillVar=c("cluster")
                         )
    
    # Plot cases per million and deaths per million by state, colored by cluster
    helperBarDeathsCases(dfPlot, 
                         numVars=c("cpm", "dpm"), 
                         title=paste0("Coronavirus impact by state through ", thruLabel), 
                         xVar=c("state"), 
                         fillVar=c("cluster")
                         )
    
}



# Helper function to make recent vs. total plots
helperCallRecentvsTotal <- function(dfPlot, 
                                    thruLabel, 
                                    labelPoints, 
                                    recentTotalTogether
                                    ) {
    
    # FUNCTION ARGUMENTS:
    # dfPlot: the plotting data frame
    # thruLabel: the date that data are through
    # labelPoints: boolean, whether to label the individual points
    # recentTotalTogether: boolean, whether to put these plots together on one page
    
    # Plot last-30 vs total for cases per million by state, colored by cluster
    p7 <- helperRecentvsTotal(dfPlot, 
                              xVar="cpm", 
                              yVar="newcpm", 
                              title=paste0("Coronavirus burden through ", thruLabel), 
                              labelPlot=labelPoints,
                              printPlot=FALSE
                              )
    
    # Plot last-30 vs total for deaths per million by state, colored by cluster
    p8 <- helperRecentvsTotal(dfPlot, 
                              xVar="dpm", 
                              yVar="newdpm", 
                              title=paste0("Coronavirus burden through ", thruLabel), 
                              labelPlot=labelPoints,
                              printPlot=FALSE
                              )
    
    # Print the plots either as a single page or separately
    if (isTRUE(recentTotalTogether)) {
        gridExtra::grid.arrange(p7, p8, nrow=1)
    } else {
        print(p7); print(p8)
    }    
    
}



# Helper function to create total vs. elements plots
helperCallTotalvsElements <- function(dfPlot, 
                                      aggAndTotal, 
                                      clusterAggTogether,
                                      ...
                                      ) {

    # FUNCTION ARGUMENTS:
    # dfPlot: the plotting data frame
    # aggAndTotal: boolean, should each individual line be plotted (if FALSE an 80% CI is plotted instead)
    # clusterAggTogether: boolean, whether to print the plots all on a single page
    # ...: any other arguments to pass to helperTotalvsElements (especially pctRibbon or aggFunc)
    
    # Plot the cases per million on a free y-scale and a fixed y-scale
    p9 <- helperTotalvsElements(dfPlot, 
                                keyVar="cpm7", 
                                aggAndTotal=aggAndTotal,
                                title="Cases per million, 7-day rolling mean", 
                                printPlot=FALSE, 
                                ...
                                )
    p10 <- helperTotalvsElements(dfPlot, 
                                 keyVar="cpm7", 
                                 aggAndTotal=aggAndTotal,
                                 title="Cases per million, 7-day rolling mean", 
                                 facetScales="fixed", 
                                 printPlot=FALSE, 
                                 ...
                                 )
    
    # Plot the deaths per million on a free y-scale and a fixed y-scale
    p11 <- helperTotalvsElements(dfPlot, 
                                 keyVar="dpm7", 
                                 aggAndTotal=aggAndTotal,
                                 title="Deaths per million, 7-day rolling mean", 
                                 printPlot=FALSE, 
                                 ...
                                 )
    p12 <- helperTotalvsElements(dfPlot, 
                                 keyVar="dpm7", 
                                 aggAndTotal=aggAndTotal,
                                 title="Deaths per million, 7-day rolling mean", 
                                 facetScales="fixed", 
                                 printPlot=FALSE, 
                                 ...
                                 )
    
    if (isTRUE(clusterAggTogether)) {
        gridExtra::grid.arrange(p9, p11, nrow=1)
        gridExtra::grid.arrange(p10, p12, nrow=1)
    } else {
        print(p9); print(p10); print(p11); print(p12)
    }
    
}



# Updated function for cluster assessment
# Main function creates the required data and calls helper functions
# 1. Call helper to create overall cluster summary
# 2. Call helper function for totals and per capita by state
# 3. Call helper for recent vs. overall cases
# 4. Call helper for evolution of segment aggregate and individual components
assessClusters <- function(clusters, 
                           dfState=stateData, 
                           dfBurden=cvFilteredPerCapita,
                           thruLabel="Aug 20, 2020",
                           isCounty=FALSE,
                           plotsTogether=FALSE, 
                           clusterPlotsTogether=plotsTogether,
                           recentTotalTogether=plotsTogether, 
                           clusterAggTogether=plotsTogether, 
                           makeSummaryPlots=TRUE, 
                           makeTotalvsPerCapitaPlots=!isCounty, 
                           makeRecentvsTotalPlots=TRUE, 
                           makeTotalvsElementPlots=TRUE, 
                           showMap=!isCounty, 
                           orderCluster=FALSE
                           ) {
    
    # FUNCTION ARGUMENTS:
    # clusters: the named vector containing the clusters by state
    # dfState: the file containing the states and populations
    # dfBurden: the data containing the relevant per capita burden statistics by state-date
    # thruLabel: label for plots for 'data through'
    # isCounty: boolean, is this a plot of county-level data that have been named 'state'?
    #           FALSE means that it is normal state-level data
    # plotsTogether: boolean, should plots be consolidated on fewer pages?
    # clusterPlotsTogether: boolean, should plots p1-p4 be consolidated?
    # recentTotalTogether: boolean, should recent total plots p7-p8 be consolidated?
    # clusterAggTogether: boolean, should aggregate plots p9/p11 and p10/p12 be consolidated?
    # makeSummaryPlots: boolean, should the summary plots be made?
    # makeTotalvsPerCapitaPlots: boolean, should the total and per capita plots be produced?
    # makeRecentvsTotalPlots: boolean, should the recent vs. total plots be created?
    # makeTotalvsElementPlots: boolean, should the total vs. element plots be created?
    # showMap: boolean, whether to create a map colored by cluster (will show segment counts otherwise)
    # orderCluster: boolean, whether to order the clusters based on disease burden
    # ...: any additional arguments passed from a calling function
    #      most common would be orderCluster=TRUE, a request for the clusters to be ordered by disease burden
    
    # Attach the clusters to the state population data
    dfState <- as.data.frame(clusters) %>%
        set_names("cluster") %>%
        rownames_to_column("state") %>%
        inner_join(dfState, by="state") %>%
        mutate(cluster=factor(cluster))
    
    # Plot the rolling 7-day mean dialy disease burden by cluster
    dfPlot <- dfState %>%
        inner_join(dfBurden, by="state") %>%
        tibble::as_tibble()
    
    # Reorder the clusters if requested
    if (orderCluster) {
        dfPlot <- changeOrderLabel(dfPlot, grpVars="state")
    }
    
    # Call the helper function to make the overall summary statistics
    if (makeSummaryPlots) {
        helperClusterSummaryPlots(dfState=dfState, 
                                  dfPlot=dfPlot, 
                                  showMap=showMap, 
                                  clusterPlotsTogether=clusterPlotsTogether, 
                                  mapLevel=if(isCounty) "counties" else "states"
                                  )
    }

    # These are primarily useful for state-level data and default to FALSE when isCounty is TRUE
    if (makeTotalvsPerCapitaPlots) {
        helperCallTotalPerCapita(dfPlot=dfPlot, thruLabel=thruLabel)
    }
    
    # Call the helper function to make recent vs. total plots
    if (makeRecentvsTotalPlots) {
        helperCallRecentvsTotal(dfPlot=dfPlot, 
                                thruLabel=thruLabel, 
                                labelPoints=!isCounty, 
                                recentTotalTogether = recentTotalTogether
                                )
    }
    
    # Call the total vs. elements helper function
    if (makeTotalvsElementPlots) {
        helperCallTotalvsElements(dfPlot=dfPlot, 
                                  aggAndTotal=!isCounty, 
                                  clusterAggTogether=clusterPlotsTogether
                                  )
    }
    
    # Return the plotting data frame
    dfPlot

}



# Takes a list and creates the desired plot
assessFunction <- function(x) {
    helperAssessCountyClusters(x$objCluster$cluster, 
                               dfPop=countyFiltered, 
                               dfBurden=countyFiltered, 
                               thruLabel="Sep 3, 2020", 
                               plotsTogether=TRUE, 
                               makeTotalvsPerCapitaPlots=FALSE, 
                               makeRecentvsTotalPlots=FALSE, 
                               makeTotalvsElementPlots=FALSE, 
                               showMap=FALSE
                               )
}



# Helper function for pretty labeling and position of bar chart labels
helperTextLabel <- function(vrbl, 
                            groupVar=NULL, 
                            groupValue=NULL, 
                            divBy=1, 
                            roundTo=0, 
                            pctMin=0.1,
                            pctOut=0.025, 
                            ...
                            ) {
    
    # FUNCTION ARGUMENTS
    # vrbl: the variable to be plotted
    # groupVar: the grouping variable for deciding whether to use middle of the bar or outside
    #           NULL means no grouping variable
    # groupValue: the value to be used for groupVar
    # divBy: the amount to divide the variable by (e.g., 1000 means y-axis will be in 000s)
    # roundTo: the amount to round the displayed totals to
    # pctMin: any amount above this will be centered, any amount below this will be outside
    # pctOut: amounts on the outside will be this amount of maximum outside
    # ...: any other arguments to pass "as is" to geom_text
    
    # Create a relevant geom_text object
    # 1. label should be vrbl/divBy, rounded to roundTo and using commas as needed
    # 2. y position should be centered midpoint OR left-justified end of bar
    
    geom_text(data=if(!is.null(groupVar)) ~filter(., get(groupVar) %in% groupValue) else ~., 
              aes(y=ifelse(get(vrbl)>=pctMin*max(get(vrbl)), 
                           get(vrbl)/2/divBy, 
                           get(vrbl)/divBy + pctOut*max(get(vrbl))/divBy
                           ), 
                  label=scales::comma(get(vrbl)/divBy, accuracy=10**-roundTo), 
                  hjust=ifelse(get(vrbl)>=pctMin*max(get(vrbl)), NA, 0)
                  ), 
              ...
              )
    
}



# Function to run the data for a given state(s)
stateCountySummary <- function(states, 
                               df=fullStateData, 
                               keyDate=as.Date("2020-08-31"), 
                               showQuadrants=TRUE,
                               showCumulative=FALSE,
                               facetCumulativeByState=FALSE,
                               showAllFactorLevels=FALSE,
                               returnData=FALSE
                               ) {
    
    # FUNCTION ARGUMENTS:
    # states: the states to include in the analysis
    # df: the data frame or tibble to use
    # keyDate: date to use for getting cluster and population by county
    # showQuadrants: boolean, whether to create and show the four-quadrants of p1-p4
    # showCumulative: boolean, whether to show the cumulative deaths per capita by segment
    # facetCumulativeByState: boolean, whether to facet by state for the cumulative plot
    # showAllFactorLevels: boolean, whether to show all factor levels in the plot legend
    # returnData: boolean, should the data frame be returned?
    
    # Create a base data frame
    baseStateData <-df %>%
        arrange(fipsCounty, date) %>%
        group_by(fipsCounty) %>%
        mutate(cumDeaths=cumsum(deaths)) %>%
        filter(state %in% c(states))
    
    # Create a frame by state and cluster
    stateClusterData <- baseStateData %>%
        group_by(state, cluster, date) %>%
        summarize(n=n(), dpm7=sum(dpm7*pop)/sum(pop), cumDeaths=sum(cumDeaths), pop=sum(pop)) %>%
        ungroup()
    
    # Create a frame by cluster
    clusterData <- baseStateData %>%
        group_by(cluster, date) %>%
        summarize(n=n(), dpm7=sum(dpm7*pop)/sum(pop), cumDeaths=sum(cumDeaths), pop=sum(pop)) %>%
        ungroup()
    
    # Create and show p1-p4 if requested
    if (showQuadrants) {
        
        # Plot 1: Number of counties in cluster
        p1 <- clusterData %>%
            filter(date==keyDate) %>%
            ggplot(aes(x=fct_rev(cluster), y=n, fill=cluster)) + 
            geom_col() + 
            geom_text(aes(y=n/2, label=n)) +
            coord_flip() + 
            labs(x="Cluster", y="# Counties", title="Counties by Cluster")
        if (showAllFactorLevels) p1 <- p1 + scale_fill_discrete(drop=FALSE)
        
        # Plot 2: Total population and average population by cluster
        p2 <- clusterData %>%
            filter(date==keyDate) %>%
            mutate(popPer=pop/n) %>%
            select(cluster, pop, popPer) %>%
            pivot_longer(-cluster) %>%
            ggplot(aes(x=fct_rev(cluster), y=value/1000, fill=cluster)) + 
            geom_col() + 
            helperTextLabel("value", divBy=1000, groupVar="name", groupValue="popPer", pctMin=0.2, size=3.5) +
            helperTextLabel("value", divBy=1000, groupVar="name", groupValue="pop", pctMin=0.2, size=3.5) + 
            coord_flip() + 
            facet_wrap(~c("pop"="Population", "popPer"="Population per county")[name], scales="free_x") +
            labs(x="Cluster", y="Population (000)", title="Population by Cluster")
        if (showAllFactorLevels) p2 <- p2 + scale_fill_discrete(drop=FALSE)
        
        # Plot 3: Deaths and deaths per million (total) by cluster
        p3 <- clusterData %>%
            filter(date==keyDate) %>%
            mutate(dpm=1000000*cumDeaths/pop) %>%
            select(cluster, cumDeaths, dpm) %>%
            pivot_longer(-cluster) %>%
            ggplot(aes(x=fct_rev(cluster), y=value, fill=cluster)) + 
            geom_col() + 
            helperTextLabel("value", groupVar="name", groupValue="cumDeaths", pctMin=0.2, size=3.5) +
            helperTextLabel("value", groupVar="name", groupValue="dpm", pctMin=0.2, size=3.5) + 
            coord_flip() + 
            facet_wrap(~c("cumDeaths"="1. Total deaths", "dpm"="2. Deaths per million")[name], 
                       scales="free_x"
                       ) +
            labs(x="Cluster", y="", title="Deaths (total and per million) by cluster")
        if (showAllFactorLevels) p3 <- p3 + scale_fill_discrete(drop=FALSE)
        
        # Plot 4: Evolution of deaths by time
        p4 <- clusterData %>%
            filter(!is.na(dpm7)) %>%
            ggplot(aes(x=date, y=dpm7, group=cluster, color=cluster)) + 
            geom_line(size=1) + 
            labs(x="", y="Deaths per million", title="Deaths per million, 7-day rolling mean") + 
            scale_x_date(date_breaks="1 months", date_labels="%b")
        if (showAllFactorLevels) p4 <- p4 + scale_color_discrete(drop=FALSE)
        
        # Create a 2x2 summary of the cluster data
        gridExtra::grid.arrange(p1, p2, p3, p4, nrow=2, top=paste0("States: ", paste0(states, collapse=", ")))

    }

    # If requested, create a cumulative deaths per capita curve
    if (showCumulative) {
        p5 <- (if (facetCumulativeByState) stateClusterData else clusterData) %>%
            mutate(cumdpm=1000000*cumDeaths/pop) %>%
            ggplot(aes(x=date, y=cumdpm)) +
            geom_line(aes(group=cluster, color=cluster), lwd=1) + 
            geom_text(data=~filter(., date==max(date, na.rm=TRUE)), 
                      aes(x=date+lubridate::days(3), label=scales::comma(cumdpm, accuracy=1), color=cluster), 
                      size=3, 
                      hjust=0
                      ) + 
            labs(x="", 
                 y="Cumulative deaths per million", 
                 title=paste0("Cumulative deaths per million for: ", paste0(states, collapse=", "))
                 ) + 
            scale_x_date(date_breaks="1 months", date_labels="%b", expand=expand_scale(c(0, 0.15)))
        if (facetCumulativeByState) p5 <- p5 + facet_wrap(~state)
        if (showAllFactorLevels) p5 <- p5 + scale_color_discrete(drop=FALSE)
        print(p5)
        
    }
    
    # Return the data file if requested (by state-cluster if facetCumulativeByState, by cluster otherwise)
    if (returnData) {
        if (facetCumulativeByState) stateClusterData else stateClusterData
    }
    
}



# Function to reorder and relabel factors
changeOrderLabel <- function(df, 
                             fctVar="cluster",
                             grpVars=c(),
                             burdenVar="dpm", 
                             wgtVar="pop",
                             exclfct="999"
                             ) {
    
    # FUNCTION ARGUMENTS
    # df: the data frame
    # fctVar: the factor variable
    # grpVars: the variable that the data are aurrently at (e.g., "fipsCounty" for county-level in df)
    # burdenVar: the disease burden variable for sorting
    # wgtVar: the weight variable for sorting
    # exclfct: the factor lele to be excluded from analysis
    
    # General approach
    # 1. Data are aggregated to c(fctVar, grpVars) as x=sum(burdenVar*wgtVar) and y=mean(wgtVar)
    # 2. Data are aggregated to fctVar as z=sum(x)/sum(y)
    # 3. Factors are reordered from high to low on z, with the excluded factor added back last (if it exists)
    
    # Check if exclfct exists in the factor variable
    fctDummy <- exclfct %in% levels(df[, fctVar, drop=TRUE])
    
    # Create the summary of impact by segment
    newLevels <- df %>%
        filter(get(fctVar) != exclfct) %>%
        group_by_at(vars(all_of(c(fctVar, grpVars)))) %>%
        summarize(x=sum(get(burdenVar)*get(wgtVar)), y=mean(get(wgtVar))) %>%
        group_by_at(vars(all_of(fctVar))) %>%
        summarize(z=sum(x)/sum(y)) %>%
        arrange(-z) %>%
        pull(fctVar) %>%
        as.character()
    
    # Add back the dummy factor at the end (if it exists)
    if (fctDummy) newLevels <- c(newLevels, exclfct)
    
    # Reassign the levels in df
    df %>% 
        mutate(!!fctVar:=factor(get(fctVar), levels=newLevels, labels=newLevels))
    
}



# Create the cluster-state data
helperMakeClusterStateData <- function(dfPlot, 
                                       dfPop=usmap::countypop,
                                       dfBurden=countyDailyPerCapita, 
                                       orderCluster=FALSE
                                       ) {
    
    # FUNCTION ARGUMENTS:
    # dfPlot: the raw plotting data (which can have factors already reordered)
    # dfPop: source for full county data with population
    # dfBurden: source for disease burden by geography
    # orderCluster: boolean, whether to order the cluster factor order by burden

    # Merge in the counties that were previously excluded from segmentation
    df <- dfPlot %>%
        filter(date==max(date)) %>%
        select(fipsCounty=state, cluster) %>%
        mutate(fipsCounty=stringr::str_pad(fipsCounty, width=5, side="left", pad="0")) %>%
        right_join(select(dfPop, fipsCounty=fips, state=abbr, countyName=county, pop=pop_2015)) %>%
        mutate(cluster=factor(ifelse(is.na(cluster), 999, as.character(cluster))))
    
    # Merge in the disease data
    df <- dfBurden %>%
        mutate(fipsCounty=stringr::str_pad(state, width=5, side="left", pad="0")) %>%
        select(-state, -population) %>%
        right_join(df)
    
    # Order if requested
    if (orderCluster) df <- changeOrderLabel(df, grpVars="fipsCounty")
    
    # Return the relevant frame
    df
    
}



# Helper function to read and convert 
helperReadConvert <- function(file, 
                              valueName
                              ) {
    
    # FUNCTION ARGUMENTS:
    # file: the file for reading and converting
    # valueName: name for the values column of the pivoted data
    
    # Read file
    df <- readr::read_csv(file)
    glimpse(df)

    # Conversion of the raw data
    dfPivot <- df %>%
        rename(countyName=`County Name`, state=State) %>%
        pivot_longer(-c(countyFIPS, countyName, state, stateFIPS), 
                     names_to="date", 
                     values_to=valueName
                     ) %>%
        mutate(date=lubridate::mdy(date))
    glimpse(dfPivot)

    # Conversion of the pivoted data
    dfConverted <- countyLevelEvolution(dfPivot, 
                                        burdenVar=valueName, 
                                        inclStates=NULL, 
                                        topN=5,
                                        printPlot=FALSE, 
                                        returnData=TRUE
                                        )

    # Return the converted file
    dfConverted
    
}



# Function to read and convert raw data from USA Facts
readUSAFacts <- function(caseFile, 
                         deathFile, 
                         oldFile=NULL,
                         showBurdenMinPop=NULL,
                         maxDate=NULL,
                         stateClusters=NULL, 
                         countyClusters=NULL
                         ) {
    
    # FUNCTION ARGUMENTS:
    # caseFile: the location of the downloaded cases dataset
    # deathsFile: the location of the downloaded deaths dataset
    # oldFile: a file for comparing control totals against (NULL means do not compare)
    # showBurdenMinPop: minimum population for showing burden by cluster (NULL means skip plot)
    # maxDate: the date to use for the burden by cluster plot (ignored unless showBurdenMinPop is not NULL)
    # stateClusters: a field 'cluster' will be created from state using this named vector
    #                NULL means do not use state for finding clusters
    # countyClusters: a field 'cluster' will be created from countyFIPS using this named vector
    #                 NULL means do not use county for finding clusters
    # If both stateClusters and countyClusters are passed, only stateClusters will be used
    
    # Read cases file
    cnvCases <- helperReadConvert(caseFile, valueName="cumCases")
    
    # Read deaths file
    cnvDeaths <- helperReadConvert(deathFile, valueName="cumDeaths")

    # Join the files so there is countyFIPS-county-population-date and cumCases cumDeaths
    # Also, add the state segments as 'cluster' if requested
    dfBurden <- cnvDeaths %>% 
        select(-countyName.x, -countyName.y, -bold, cumDeathPer=burden) %>%
        inner_join(cnvCases %>% 
                       select(-countyName.x, -countyName.y, -bold, cumCasesPer=burden), 
                   by=c("countyFIPS", "stateFIPS", "county", "state", "date", "population")
                   ) %>%
    mutate(cluster=if(is.null(stateClusters)) NA else stateClusters[state])

    # Attach county clusters if requested and if state clusters were not attached
    # County cluster file converted to 5-digit character
    str_pad5 <- function(x) stringr::str_pad(x, width=5, side="left", pad="0")
    if (is.null(stateClusters) & !is.null(countyClusters)) {
        names(countyClusters) <- str_pad5(as.character(names(countyClusters)))
        dfBurden <- dfBurden %>%
            mutate(cluster=countyClusters[str_pad5(countyFIPS)])
    }

    # Compare against an old file, if requested
    if (!is.null(oldFile)) {
        p1 <- bind_rows(oldFile, dfBurden, .id="source") %>%
            mutate(source=factor(case_when(source==1 ~ "Previous", source==2 ~ "New", TRUE ~ "Unknown"), 
                                 levels=c("New", "Previous", "Unknown")
                                 )
                   ) %>%
            group_by(source, date) %>%
            summarize(cumDeaths=sum(cumDeaths), cumCases=sum(cumCases)) %>%
            pivot_longer(-c(source, date)) %>%
            ggplot(aes(x=date, y=value/1000, group=source, color=source)) + 
            geom_line() + 
            facet_wrap(~c("cumCases"="Cases", "cumDeaths"="Deaths")[name], scales="free_y") + 
            scale_x_date(date_breaks="1 months", date_labels="%m") + 
            labs(y="Burden (000s)", title="US National Coronavirus Burden by Source")
        print(p1)
    }
    
    if (!is.null(showBurdenMinPop)) {
        plotBurdenData(dfBurden, maxDate=maxDate, minPop=showBurdenMinPop)
    }
    
    # Return the burdens file
    dfBurden
    
}



# Function to plot the burden by cluster
plotBurdenData <- function(df, 
                           maxDate, 
                           minPop
                           ) {
    
    # FUNCTION ARGUMENTS:
    # df: the burden data
    # maxDate: the maximum date
    # minPop: the minimum population

    # Create a date label
    dateLabel <- format(as.Date(maxDate), "%b %d, %Y")
    
    # Plot the data as of maxDate
    p1 <- df %>%
        filter(date==maxDate, population>=minPop) %>%
        ggplot(aes(x=cumCasesPer, y=cumDeathPer)) + 
        geom_point(aes(size=log10(population), color=factor(cluster)), alpha=0.25) + 
        geom_smooth(method="lm", se=FALSE, aes(weight=population, color=factor(cluster))) +
        scale_size_continuous("log10\nCounty\nPop") + 
        labs(x=paste0("Cases per million as of ", dateLabel), 
             y=paste0("Deaths per million as of ", dateLabel), 
             title=paste0("County-level per-capita coronavirus burden as of ", dateLabel), 
             subtitle=paste0("Filtered to counties with population of at least ", minPop)
             ) + 
        facet_wrap(~cluster)
    print(p1)
    
    # Further, the total burden by cluster is plotted
    p2 <- df %>%
        filter(date==maxDate, population>=0) %>%
        group_by(cluster) %>%
        summarize(population=sum(population), 
                  cumCases=sum(cumCases), 
                  cumDeath=sum(cumDeaths), 
                  mdn_CasesPer=median(cumCasesPer), 
                  mdn_DeathPer=median(cumDeathPer)
                  ) %>%
        mutate(mean_CasesPer=1000000*cumCases/population, mean_DeathPer=1000000*cumDeath/population) %>%
        select(cluster, starts_with("mdn"), starts_with("mean")) %>%
        pivot_longer(-cluster) %>%
        mutate(aggType=stringr::str_replace(name, "_.*", ""), 
               metType=stringr::str_replace(name, ".*_", "")
               ) %>%
        pivot_wider(c(cluster, aggType), names_from="metType", values_from="value") %>%
        ggplot(aes(x=CasesPer, y=DeathPer)) + 
        geom_point(aes(color=factor(cluster)), size=5) + 
        labs(x=paste0("Cases per million as of ", dateLabel), 
             y=paste0("Deaths per million as of ", dateLabel), 
             title=paste0("Cluster-level per-capita coronavirus burden as of ", dateLabel), 
             subtitle="State-level clusters based on hierarchical (method=`complete`)"
             ) + 
        scale_color_discrete("Cluster") +
        ylim(c(0, NA)) + 
        xlim(c(0, NA)) + 
        facet_wrap(~c("mdn"="Median of all counties in segment", "mean"="Segment-level metric")[aggType])
    print(p2)

}



# Create the filtered county data frame
createCountyFiltered <- function(df, 
                                 maxDate, 
                                 minPop
                                 ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame with the burden data
    # maxDate: the last date to use
    # minPop: the minimum population to use
    
    # STEP 1: Select only desired variables from df
    df <- df %>%
        select(state=countyFIPS, date, cpm=cumCasesPer, dpm=cumDeathPer, population) %>%
        arrange(state, date)
    
    # STEP 1a: Confirm that there are no duplicates and that every county has the same dates
    # This should be 1 provided that there are no duplicates
    cat("\nThis should be 1, otherwise there are duplicates\n")
    df %>% 
        count(state, date) %>% 
        pull(n) %>% 
        max() %>%
        print()
    cat("\n\n\n")

    # This should have no standard deviation if the same number of records exist on every day
    cat("\nMin and max should be the same if there are records everywhere on every day\n")
    df %>%
        mutate(n=1) %>%
        group_by(date) %>%
        summarize(n=sum(n), population=sum(population)) %>%
        summarize_at(vars(all_of(c("n", "population"))), .funs=list(min=min, max=max)) %>%
        print()
    cat("\n\n\n")
    
    # STEP 2: Convert to daily new totals rather than cumulative data
    dfDaily <- df %>%
        group_by(state) %>%
        arrange(date) %>%
        mutate_at(vars(all_of(c("cpm", "dpm"))), ~ifelse(row_number()==1, ., .-lag(.))) %>%
        ungroup()

    # STEP 2a: Add rolling 7 aggregates and total cases/deaths
    dfDaily <- dfDaily %>%
        arrange(state, date) %>%
        group_by(state) %>%
        helperRollingAgg(origVar="cpm", newName="cpm7", k=7) %>%
        helperRollingAgg(origVar="dpm", newName="dpm7", k=7) %>%
        ungroup() %>%
        mutate(cases=cpm*population/1000000, deaths=dpm*population/1000000)

    # STEP 3: Filter the data frame for date and minimum population
    # Ensure that 'state' (which holds countyFIPS) is not summed as a double
    dfFiltered <- dfDaily %>%
        filter(population >= minPop, date <= as.Date(maxDate)) %>%
        mutate(state=as.character(state))

    # Check number of counties that will fail the test for 100 deaths per million or 5000 cases per million
    is0 <- function(x) mean(x==0)
    isltn <- function(x, n) mean(x<n)
    islt100 <- function(x) isltn(x, n=100)
    islt5000 <- function(x) isltn(x, n=5000)

    dfFiltered %>% 
        group_by(state) %>% 
        summarize_at(c("cpm", "dpm"), sum) %>% 
        ungroup() %>%
        summarize_at(vars(all_of(c("cpm", "dpm"))), 
                     .funs=list(mean_is0=is0, mean_lt100=islt100, mean_lt5000=islt5000)
                     ) %>%
        print()
    
    # Return the data file
    dfFiltered
    
}



helperCorrel <- function(lagDays, df=clusterMarchMay, x="cpm7", y="dpm7") {
    df %>%
        group_by(cluster) %>%
        arrange(date) %>%
        mutate(xlag=lag(get(x), lagDays)) %>%
        summarize(corr=cor(xlag, get(y), use="complete.obs")) %>%
        mutate(lag=lagDays) %>%
        ungroup()
}



# Helper function to make normalized data for cases and deaths
helperMakeNormData <- function(df, 
                               aggBy="cluster", 
                               plotData=TRUE
                               ) {

    # FUNCTION ARGUMENTS:
    # df: data frame or tibble
    # aggBy: variable for aggregation for the normalized data
    # plotData: boolean, whether to plot the data
    
    # Create the normalized data
    normData <- df %>%
        group_by_at(vars(all_of(c(aggBy, "date")))) %>%
        summarize(cpm7=sum(pop*cpm7)/sum(pop), dpm7=sum(pop*dpm7)/sum(pop), pop=sum(pop)) %>%
        group_by_at(vars(all_of(aggBy))) %>%
        mutate(caseNorm=100*cpm7/max(cpm7, na.rm=TRUE), deathNorm=100*dpm7/max(dpm7, na.rm=TRUE)) %>%
        ungroup()
    
    # Plot the normalized data (if requested)
    if (plotData) {
        # Create the plotting object
        p1 <- normData %>%
            select(all_of(aggBy), date, caseNorm, deathNorm) %>%
            pivot_longer(-c(all_of(aggBy), "date")) %>%
            filter(!is.na(value)) %>%
            ggplot(aes(x=date, 
                       y=value, 
                       color=c("deathNorm"="Normalized Death", "caseNorm"="Normalized Cases")[name], 
                       group=name
                       )
                   ) + 
            geom_line() + 
            facet_wrap(~get(aggBy)) + 
            scale_color_discrete("Metric") + 
            scale_x_date(date_breaks="1 months", date_labels="%m") + 
            labs(x="2020", 
                 y="Normalized Burden", 
                 title="Burden by Segment", 
                 subtitle="Normalized (100 is segment maximum for metric)"
                 )
        print(p1)
    }

    # Return the normalized data
    normData
    
}



# Helper function to assess the correlations by lag
helperCorrel <- function(lagDays, df, aggBy="cluster", x="cpm7", y="dpm7") {
    df %>%
        group_by_at(vars(all_of(aggBy))) %>%
        arrange(date) %>%
        mutate(xlag=lag(get(x), lagDays)) %>%
        summarize(corr=cor(xlag, get(y), use="complete.obs")) %>%
        mutate(lag=lagDays) %>%
        ungroup()
}



# Function to test lags and produce subsetted frame
helperTestLags <- function(df,
                           minDate=NULL, 
                           maxDate=NULL,
                           aggBy="cluster", 
                           maxRatio=1,
                           minRatio=-0.05,
                           plotData=TRUE
                           ) {
    
    # FUNCTION ARGUMENTS:
    # df: data frame or tibble
    # minDate: the minimum date to use for analysis (NULL means use data minimum)
    # maxDate: the maximum date to use for analysis (NULL means use data minimum)
    # aggBy: the aggregation level for the input data in df
    # maxRatio: the maximum ratio of deaths to cases (flat-line at this ratio if above)
    # minRatio: the minimum ratio of deaths to cases (flat-line at this ratio if below)
    # plotData: boolean, whether to plot the subset of the data
    
    # Get the minimum and/or maximum date from the data if passed as NULL
    if (is.null(minDate)) minDate <- df %>% pull(date) %>% min(na.rm=TRUE) else minDate <- as.Date(minDate)
    if (is.null(maxDate)) maxDate <- df %>% pull(date) %>% max(na.rm=TRUE) else maxDate <- as.Date(maxDate)
    
    # Filter the data to only the desired columns and time periods
    filterData <- df %>%
        select(all_of(aggBy), date, cpm7, dpm7) %>%
        filter(!is.na(cpm7), !is.na(dpm7), date>=minDate, date<=maxDate)

    # Create the plot data if requested
    if (plotData) {
        p1 <- filterData %>%
            pivot_longer(-c(all_of(aggBy), date)) %>%
            group_by_at(vars(all_of(c(aggBy, "name")))) %>%
            mutate(normValue=100*value/max(value)) %>%
            filter(!is.na(normValue)) %>%
            ggplot(aes(x=date, 
                       y=normValue, 
                       color=c("dpm7"="Normalized Death", "cpm7"="Normalized Cases")[name], 
                       group=name
                       )
                   ) + 
            geom_line() + 
            facet_wrap(~get(aggBy)) + 
            scale_color_discrete("Metric") + 
            scale_x_date(date_breaks="1 months", date_labels="%m") + 
            labs(x="2020", 
                 y="Normalized Burden", 
                 title=paste0("Burden by Segment (", 
                              format(minDate, "%b %d"), 
                              " - ", 
                              format(maxDate, "%b %d"),
                              ")"
                              ), 
                 subtitle="Normalized (100 is segment maximum for metric during time period)"
                 )
        print(p1)
    }

    # Test the various lags for correlations
    corrLagEarly <- map_dfr(.x=0:30, .f=helperCorrel, df=filterData, aggBy=aggBy)
    
    # Plot of correlations (if requested)
    if (plotData) {
        p2 <- corrLagEarly %>%
            ggplot(aes(x=lag, y=corr)) +
            geom_line(aes(group=get(aggBy), color=get(aggBy))) + 
            facet_wrap(~get(aggBy)) + 
            labs(x="Lag (days) for cases", 
                 y="Correlation to deaths", 
                 title="Correlation of lagged cases and deaths"
                 ) + 
            guides(color=FALSE)
        print(p2)
    }

    # Find the best lags for each aggBy
    bestLag <- corrLagEarly %>%
        group_by_at(vars(all_of(aggBy))) %>%
        filter(corr==max(corr)) %>%
        ungroup()

    cat("\nThe best lags are:\n")
    print(bestLag)

    # Create the lagged database
    lagData <- filterData %>%
        inner_join(bestLag, by=aggBy) %>%
        group_by_at(vars(all_of(aggBy))) %>%
        arrange(date) %>%
        mutate(cpmlag=lag(cpm7, min(lag)))
    
    # Create the plot of the lagged database
    if (plotData) {
        p3 <- lagData %>%
            filter(!is.na(cpmlag)) %>%
            mutate(ratCur=pmax(minRatio, pmin(maxRatio, dpm7/cpmlag)), 
                   ratCum=pmax(minRatio, pmin(maxRatio, cumsum(dpm7)/cumsum(cpmlag)))
                   ) %>%
            ggplot(aes(x=date, group=get(aggBy), color=get(aggBy))) + 
            geom_line(aes(y=ratCum)) +
            geom_line(aes(y=ratCur), lty=2) + 
            labs(x="", 
                 y="Cumulative Death vs. Case Ratio", 
                 title="Deaths vs. Cases with Lag (Subsetted Time Period)", 
                 subtitle="Solid line is cumulative, dashed line is current time period",
                 caption=paste0("Ratio of death to cases capped between ", 
                                round(maxRatio, 3), 
                                " and ", 
                                round(minRatio, 3),
                                " for plotting")
                 ) + 
            geom_text(data=~filter(., date==max(date)), 
                      aes(label=paste0(round(100*ratCum, 1), "%"), 
                          y=ratCum, 
                          x=date+lubridate::days(3)
                          ), 
                      hjust=0
                      ) +
            scale_x_date(date_breaks="1 months", date_labels="%m", expand=expand_scale(mult=c(0, 0.2))) + 
            guides(color=FALSE) +
            facet_wrap(~get(aggBy))
        print(p3)
    }
    
    # Return the lagged data
    lagData
    
}



# Explore the top-n counties by population or dpm in a subset of states and/or clusters
exploreTopCounties <- function(df, 
                               minDate, 
                               maxDate,
                               minPop=100000, 
                               states=state.abb, 
                               clusters=df %>% pull(cluster) %>% unique(),
                               topNBy=c(),
                               nVar="pop",
                               nKey=9, 
                               plotData=TRUE
                               ) {

    # FUNCTION ARGUMENTS:
    # df: the data frame or tibble containing county (as variable 'state'), cluster, pop, date, dpm, cpm
    # minDate: minimum date for lags
    # maxDate: maximum date for lags
    # states: the states for filtering
    # clusters: the clusters to be used for filtering
    # topNBy: variable(s) for calculating top-n (nKey by nVar) by; pass variable names quoted for group_by_at
    #         c() means that the global top-n will be calculated (nKey by nVar)
    # minPop: minimum population for including as a top county
    # nVar: the variable to use for top-n (should be "pop" or "dpm")
    # nKey: the number of counties to pull for top-N
    # plotData: whether to create plots for the data
    
    minDate <- as.Date(minDate)
    maxDate <- as.Date(maxDate)
    
    # Extract a list of the heaviest hit counties with population 1000k+
    ctyFilter <- df %>%
        mutate(state=str_pad(state, width=5, side="left", pad="0")) %>%
        inner_join(select(usmap::countypop, -pop_2015), by=c("state"="fips")) %>%
        mutate(countyName=paste0(cluster, " - ", 
                                 stringr::str_replace(county, "County|Parish", "("), 
                                 abbr, 
                                 ")"
                                 )
               ) %>%
        filter(pop >= minPop, abbr %in% states, cluster %in% clusters) %>%
        group_by(state, cluster, countyName) %>%
        summarize(dpm=sum(dpm), pop=mean(pop)) %>%
        ungroup() %>%
        group_by_at(vars(all_of(topNBy))) %>%
        top_n(n=nKey, wt=get(nVar)) %>%
        ungroup() %>%
        arrange(desc(get(nVar)))

    # Display the list of key counties
    cat("\n*** KEY COUNTIES INCLUDE: ***\n")
    print(ctyFilter)
    
    # Keep only the key counties and normalize the data
    cNorm_key <- df %>%
        mutate(state=str_pad(state, width=5, side="left", pad="0")) %>%
        inner_join(select(ctyFilter, state, countyName), by=c("state"="state")) %>%
        helperMakeNormData(aggBy=c("countyName", "state", "cluster"), plotData=plotData)

    # Create lags for given time period
    lag_key <- helperTestLags(cNorm_key, 
                              minDate=minDate, 
                              maxDate=maxDate, 
                              aggBy=c("countyName", "state", "cluster"), 
                              maxRatio=0.25, 
                              plotData=plotData
                              )

    # Return the lagged data
    lag_key
    
}



# Function to read and pivot input data from USA Facts
readPivotUSAFacts <- function(popFile, 
                              caseFile, 
                              deathFile, 
                              unassignedDate
                              ) {
    
    # FUNCTION ARGUMENTS:
    # popFile: population file
    # caseFile: case file
    # deathFile: deaths file
    # unassignedDate: date to be used for extracting unassigned summaries (generally, last complete date)
    
    # Read population file
    pop <- readr::read_csv(popFile) %>%
        rename(countyName=`County Name`, state=State)    
    
    # Function for reading and pivoting data
    helperReadPivot <- function(x, valueName) {
        readr::read_csv(x) %>%
            rename(countyName=`County Name`, state=State) %>%
            pivot_longer(-c(countyFIPS, countyName, state, stateFIPS), 
                         names_to="date", 
                         values_to=valueName
                         ) %>%
            mutate(date=lubridate::mdy(date))
    }
    
    # Read cases file
    cases <- helperReadPivot(caseFile, valueName="cumCases")
    
    # Read deaths file
    deaths <- helperReadPivot(deathFile, valueName="cumDeaths")

    # Assess the extent of the unassigned data issue
    unassigned <- cases %>%
        mutate(unassigned=ifelse(countyFIPS==0, 1, 0)) %>%
        group_by(state, date, unassigned) %>%
        summarize(cases=sum(cumCases)) %>%
        ungroup() %>%
        full_join(deaths %>% 
                      mutate(unassigned=ifelse(countyFIPS==0, 1, 0)) %>% 
                      group_by(state, date, unassigned) %>% 
                      summarize(deaths=sum(cumDeaths)) %>% 
                      ungroup()
                  )
    
    # Create plot for the unassigned data
    p1 <- unassigned %>%
        filter(date==unassignedDate) %>%
        pivot_longer(c("cases", "deaths")) %>%
        group_by(state, name) %>%
        summarize(total=sum(value), unass=sum(value*unassigned)/sum(value)) %>%
        ggplot(aes(x=fct_reorder(state, unass*ifelse(name=="cases", 1, 0)), y=unass)) + 
        geom_point() + 
        geom_text(data=~filter(., (name=="cases" & unass >= 0.02) | (name=="deaths" & unass > 0.005)), 
                  aes(y=unass + ifelse(name=="cases", -0.0025, -0.001), 
                      label=paste0(round(100*unass, 1), "% (", state ,")")
                      ), 
                  size=3, 
                  hjust=1
                  ) +
        coord_flip() + 
        labs(x="", 
             y=paste0("% Unassigned as of ", as.character(unassignedDate)), 
             title="Unassigned percentage by state"
             ) +
        facet_wrap(~name, scales="free_x")
    print(p1)
    
    # Return list containing the three key files
    list(pop=pop, cases=cases, deaths=deaths, unassigned=unassigned)
    
}



# STEP 4: Create county-level clusters
prepClusterCounties <- function(burdenFile, 
                                maxDate,
                                minPop, 
                                createClusters=TRUE,
                                hierarchical=FALSE, 
                                returnList=!hierarchical, 
                                ...
                                ) {
    
    # FUNCTION ARGUMENTS:
    # burdenFile: the pivoted file containing the burdens data
    # maxDate: the latest date to be used from the data
    # minPop: the smallest population for a county to be included
    # createClusters: boolean, whether to create county-level clusters (if FALSE, object returned as NULL)
    #                 FALSE would be for prepping and converting data only (those objects are returned)
    # hierarchical: whether to create hierarchical clusters (only set up for FALSE as of now)
    # returnList: whether the clustering call returns a list (only set up for TRUE as of now)
    # ...: other arguments to be passed to clusterStates
    
    # STEP 1: Select only desired variables from burden file
    countyCumPerCapita <- burdenFile %>%
        select(state=countyFIPS, date, cpm=cumCasesPer, dpm=cumDeathPer, population) %>%
        arrange(state, date)

    # STEP 2: Confirm that there are no duplicates and that every county has the same dates
    # This should be 1 provided that there are no duplicates
    countyCumPerCapita %>% 
        count(state, date) %>% 
        pull(n) %>% 
        max()

    # This should have no standard deviation if the same number of records exist on every day
    countyCumPerCapita %>%
        mutate(n=1) %>%
        group_by(date) %>%
        summarize(n=sum(n), population=sum(population)) %>%
        summarize_at(vars(all_of(c("n", "population"))), .funs=list(sd=sd, min=min, max=max))

    # STEP 3: Convert to daily new totals rather than cumulative data
    countyDailyPerCapita <- countyCumPerCapita %>%
        group_by(state) %>%
        arrange(date) %>%
        mutate_at(vars(all_of(c("cpm", "dpm"))), ~ifelse(row_number()==1, ., .-lag(.))) %>%
        ungroup()

    # STEP 4: Add rolling 7 aggregates and total cases/deaths
    countyDailyPerCapita <- countyDailyPerCapita %>%
        arrange(state, date) %>%
        group_by(state) %>%
        helperRollingAgg(origVar="cpm", newName="cpm7", k=7) %>%
        helperRollingAgg(origVar="dpm", newName="dpm7", k=7) %>%
        ungroup() %>%
        mutate(cases=cpm*population/1000000, deaths=dpm*population/1000000)

    # STEP 5: Filter the data prior to clustering
    countyFiltered <- countyDailyPerCapita %>%
        filter(population >= minPop, date <= as.Date(maxDate)) %>%
        mutate(state=as.character(state))

    # STEP 6: Check the implications of the filtering
    # Check number of counties that will fail the test for 100 deaths per million or 5000 cases per million
    is0 <- function(x) mean(x==0)
    isltn <- function(x, n) mean(x<n)
    islt100 <- function(x) isltn(x, n=100)
    islt5000 <- function(x) isltn(x, n=5000)

    countyFiltered %>% 
        group_by(state) %>% 
        summarize_at(c("cpm", "dpm"), sum) %>% 
        ungroup() %>%
        summarize_at(vars(all_of(c("cpm", "dpm"))), 
                     .funs=list(mean_is0=is0, mean_lt100=islt100, mean_lt5000=islt5000)
                     ) %>%
        print()

    # Run county-level clusters if requested, otherwise store as NULL
    objCluster <- if(createClusters) {
        clusterStates(countyFiltered, hierarchical=hierarchical, returnList=returnList, ...)
    } else {
        NULL
    }
    
    # Return all of the relevant objects
    list(objCluster=objCluster, 
         countyFiltered=countyFiltered, 
         countyDailyPerCapita=countyDailyPerCapita, 
         countyCumPerCapita=countyCumPerCapita
         )
    
}

Running Key Analyses (USA Facts)

The data from USA Facts can be loaded and analyzed using the functions above, a variable mapping file, and a main function that calls the other functions and returns a list:

# STEP 1a: Define the locations for the population, cases, and deaths file
popFile <- "./RInputFiles/Coronavirus/covid_county_population_usafacts.csv"
caseFile_20200903 <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20200903.csv"
deathFile_20200903 <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20200903.csv"
maxDate_20200903 <- "2020-08-31"

# STEP 1b: Read and pivot data from USA Facts; extract population data file as pop_usafacts
rawUSAFacts_20200903 <- readPivotUSAFacts(popFile=popFile, 
                                          caseFile=caseFile_20200903, 
                                          deathFile=deathFile_20200903, 
                                          unassignedDate=maxDate_20200903
                                          )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Joining, by = c("state", "date", "unassigned")

pop_usafacts <- rawUSAFacts_20200903$pop

# STEP 2: Read case and death data (redundant), combine, and add population totals; no clusters by default
burden_20200903_new <- readUSAFacts(
    caseFile=caseFile_20200903, 
    deathFile=deathFile_20200903, 
    stateClusters=NULL
    )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 228
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 1, 4, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 4, 4, 0, 0, 1, 0, 1, 2, 10, 1, 1, 0, 0, 1, 1, 0, 1...
## $ `3/26/20`     <dbl> 0, 6, 5, 0, 0, 2, 2, 1, 2, 13, 1, 4, 1, 0, 1, 1, 0, 1...
## $ `3/27/20`     <dbl> 0, 6, 5, 0, 0, 5, 2, 1, 3, 15, 1, 7, 1, 0, 1, 3, 0, 1...
## $ `3/28/20`     <dbl> 0, 6, 10, 0, 0, 5, 2, 1, 3, 17, 1, 7, 1, 0, 2, 4, 0, ...
## $ `3/29/20`     <dbl> 0, 6, 15, 0, 0, 5, 2, 1, 3, 27, 2, 8, 1, 0, 2, 5, 0, ...
## $ `3/30/20`     <dbl> 0, 7, 18, 0, 2, 5, 2, 1, 9, 36, 2, 10, 2, 0, 2, 5, 0,...
## $ `3/31/20`     <dbl> 0, 7, 19, 0, 3, 5, 2, 1, 9, 36, 2, 11, 3, 0, 2, 5, 0,...
## $ `4/1/20`      <dbl> 0, 10, 23, 0, 3, 5, 2, 1, 11, 45, 2, 13, 4, 2, 3, 6, ...
## $ `4/2/20`      <dbl> 0, 10, 25, 0, 4, 6, 2, 1, 12, 67, 4, 14, 4, 2, 7, 6, ...
## $ `4/3/20`      <dbl> 0, 12, 28, 1, 4, 9, 2, 1, 20, 81, 5, 15, 4, 3, 8, 7, ...
## $ `4/4/20`      <dbl> 0, 12, 29, 2, 4, 10, 2, 1, 21, 87, 6, 15, 4, 7, 9, 7,...
## $ `4/5/20`      <dbl> 0, 12, 34, 2, 7, 10, 2, 1, 24, 90, 6, 18, 5, 9, 9, 7,...
## $ `4/6/20`      <dbl> 0, 12, 38, 3, 7, 10, 2, 1, 38, 96, 6, 20, 6, 9, 9, 9,...
## $ `4/7/20`      <dbl> 0, 12, 42, 3, 8, 10, 2, 2, 48, 102, 6, 20, 6, 10, 9, ...
## $ `4/8/20`      <dbl> 0, 12, 49, 3, 9, 10, 3, 3, 52, 140, 7, 22, 6, 10, 11,...
## $ `4/9/20`      <dbl> 0, 17, 59, 7, 11, 11, 4, 3, 54, 161, 7, 25, 6, 13, 11...
## $ `4/10/20`     <dbl> 0, 17, 59, 9, 11, 12, 4, 3, 54, 171, 7, 27, 7, 13, 11...
## $ `4/11/20`     <dbl> 0, 19, 66, 10, 13, 12, 4, 6, 57, 184, 7, 30, 9, 15, 1...
## $ `4/12/20`     <dbl> 0, 19, 71, 10, 16, 13, 4, 7, 60, 200, 9, 30, 10, 19, ...
## $ `4/13/20`     <dbl> 0, 19, 78, 10, 17, 15, 6, 8, 61, 212, 9, 33, 10, 19, ...
## $ `4/14/20`     <dbl> 0, 23, 87, 11, 17, 16, 8, 8, 62, 216, 9, 33, 12, 21, ...
## $ `4/15/20`     <dbl> 0, 25, 98, 13, 19, 17, 8, 11, 62, 227, 10, 37, 13, 22...
## $ `4/16/20`     <dbl> 0, 25, 102, 14, 23, 18, 8, 11, 63, 234, 11, 37, 13, 2...
## $ `4/17/20`     <dbl> 0, 25, 103, 15, 23, 20, 8, 13, 63, 236, 12, 37, 13, 2...
## $ `4/18/20`     <dbl> 0, 25, 109, 18, 26, 20, 9, 13, 66, 240, 12, 39, 14, 2...
## $ `4/19/20`     <dbl> 0, 27, 114, 20, 28, 21, 9, 14, 72, 246, 12, 42, 14, 2...
## $ `4/20/20`     <dbl> 0, 28, 117, 22, 32, 22, 11, 14, 80, 257, 12, 43, 17, ...
## $ `4/21/20`     <dbl> 0, 30, 123, 28, 32, 26, 11, 15, 83, 259, 12, 44, 18, ...
## $ `4/22/20`     <dbl> 0, 32, 132, 29, 33, 29, 11, 17, 85, 270, 12, 46, 21, ...
## $ `4/23/20`     <dbl> 0, 33, 143, 30, 33, 31, 12, 19, 88, 275, 12, 47, 22, ...
## $ `4/24/20`     <dbl> 0, 36, 147, 32, 34, 31, 12, 21, 89, 282, 12, 49, 25, ...
## $ `4/25/20`     <dbl> 0, 37, 154, 33, 35, 31, 12, 28, 90, 284, 12, 49, 27, ...
## $ `4/26/20`     <dbl> 0, 37, 161, 33, 38, 34, 12, 32, 90, 285, 14, 51, 32, ...
## $ `4/27/20`     <dbl> 0, 39, 168, 35, 42, 34, 12, 34, 90, 289, 14, 51, 39, ...
## $ `4/28/20`     <dbl> 0, 40, 171, 37, 42, 34, 12, 45, 92, 290, 15, 52, 39, ...
## $ `4/29/20`     <dbl> 0, 42, 173, 37, 42, 36, 12, 51, 93, 290, 15, 52, 39, ...
## $ `4/30/20`     <dbl> 0, 42, 174, 39, 42, 37, 13, 53, 93, 290, 15, 52, 43, ...
## $ `5/1/20`      <dbl> 0, 42, 175, 42, 42, 39, 14, 65, 93, 290, 15, 52, 49, ...
## $ `5/2/20`      <dbl> 0, 45, 181, 43, 42, 40, 14, 92, 98, 294, 15, 54, 49, ...
## $ `5/3/20`      <dbl> 0, 48, 187, 45, 42, 40, 14, 105, 105, 300, 16, 57, 49...
## $ `5/4/20`      <dbl> 0, 53, 188, 45, 42, 40, 16, 114, 105, 302, 16, 58, 51...
## $ `5/5/20`      <dbl> 0, 53, 189, 47, 43, 40, 18, 120, 114, 304, 17, 60, 54...
## $ `5/6/20`      <dbl> 0, 58, 196, 47, 43, 42, 18, 130, 114, 306, 18, 61, 54...
## $ `5/7/20`      <dbl> 0, 61, 205, 51, 44, 44, 18, 155, 120, 308, 18, 63, 56...
## $ `5/8/20`      <dbl> 0, 67, 208, 53, 44, 44, 21, 162, 123, 311, 21, 63, 59...
## $ `5/9/20`      <dbl> 0, 68, 216, 58, 45, 44, 22, 178, 124, 314, 22, 64, 61...
## $ `5/10/20`     <dbl> 0, 74, 222, 59, 46, 44, 23, 189, 124, 316, 22, 65, 66...
## $ `5/11/20`     <dbl> 0, 84, 224, 61, 46, 45, 26, 196, 125, 319, 24, 67, 67...
## $ `5/12/20`     <dbl> 0, 91, 227, 67, 46, 45, 26, 224, 126, 324, 24, 69, 69...
## $ `5/13/20`     <dbl> 0, 93, 231, 69, 46, 45, 28, 230, 127, 324, 24, 73, 72...
## $ `5/14/20`     <dbl> 0, 103, 243, 74, 46, 45, 28, 249, 128, 326, 25, 74, 7...
## $ `5/15/20`     <dbl> 0, 103, 244, 79, 49, 45, 32, 258, 129, 326, 26, 75, 8...
## $ `5/16/20`     <dbl> 0, 110, 254, 79, 50, 45, 35, 271, 130, 328, 27, 77, 8...
## $ `5/17/20`     <dbl> 0, 110, 254, 81, 50, 46, 35, 272, 130, 328, 27, 77, 8...
## $ `5/18/20`     <dbl> 0, 120, 260, 85, 50, 47, 40, 285, 133, 329, 28, 79, 8...
## $ `5/19/20`     <dbl> 0, 127, 262, 90, 51, 47, 52, 295, 133, 329, 29, 80, 9...
## $ `5/20/20`     <dbl> 0, 136, 270, 96, 52, 47, 64, 312, 136, 330, 30, 83, 1...
## $ `5/21/20`     <dbl> 0, 147, 270, 100, 52, 48, 71, 321, 136, 330, 31, 84, ...
## $ `5/22/20`     <dbl> 0, 149, 271, 104, 55, 49, 89, 329, 137, 330, 33, 85, ...
## $ `5/23/20`     <dbl> 0, 155, 273, 105, 58, 49, 105, 335, 138, 330, 33, 86,...
## $ `5/24/20`     <dbl> 0, 159, 274, 110, 59, 49, 111, 344, 141, 336, 33, 87,...
## $ `5/25/20`     <dbl> 0, 173, 276, 116, 62, 49, 141, 368, 147, 337, 33, 87,...
## $ `5/26/20`     <dbl> 0, 189, 277, 122, 66, 51, 167, 380, 150, 338, 33, 90,...
## $ `5/27/20`     <dbl> 0, 192, 281, 130, 71, 53, 176, 391, 152, 340, 33, 93,...
## $ `5/28/20`     <dbl> 0, 205, 281, 132, 71, 58, 185, 392, 152, 349, 34, 97,...
## $ `5/29/20`     <dbl> 0, 212, 282, 147, 71, 60, 201, 396, 153, 352, 36, 99,...
## $ `5/30/20`     <dbl> 0, 216, 283, 150, 72, 61, 203, 402, 154, 353, 37, 100...
## $ `5/31/20`     <dbl> 0, 220, 288, 164, 75, 62, 209, 410, 157, 355, 37, 100...
## $ `6/1/20`      <dbl> 0, 233, 292, 172, 76, 63, 209, 414, 164, 358, 38, 103...
## $ `6/2/20`      <dbl> 0, 238, 292, 175, 76, 63, 212, 416, 165, 358, 38, 104...
## $ `6/3/20`      <dbl> 0, 239, 292, 177, 76, 63, 215, 419, 165, 359, 38, 105...
## $ `6/4/20`      <dbl> 0, 241, 293, 177, 76, 63, 217, 421, 167, 360, 38, 107...
## $ `6/5/20`      <dbl> 0, 248, 296, 183, 76, 64, 219, 431, 169, 363, 38, 108...
## $ `6/6/20`      <dbl> 0, 259, 304, 190, 77, 70, 225, 442, 174, 373, 40, 108...
## $ `6/7/20`      <dbl> 0, 265, 313, 193, 77, 72, 232, 449, 176, 378, 42, 110...
## $ `6/8/20`      <dbl> 0, 272, 320, 197, 79, 73, 238, 455, 178, 383, 42, 111...
## $ `6/9/20`      <dbl> 0, 282, 325, 199, 85, 75, 243, 464, 180, 391, 42, 117...
## $ `6/10/20`     <dbl> 0, 295, 331, 208, 89, 79, 248, 471, 182, 401, 42, 118...
## $ `6/11/20`     <dbl> 0, 312, 343, 214, 93, 87, 253, 484, 184, 417, 42, 121...
## $ `6/12/20`     <dbl> 0, 323, 353, 221, 97, 95, 258, 499, 188, 427, 46, 122...
## $ `6/13/20`     <dbl> 0, 331, 361, 226, 100, 102, 276, 517, 190, 438, 47, 1...
## $ `6/14/20`     <dbl> 0, 357, 364, 234, 104, 110, 302, 536, 195, 453, 51, 1...
## $ `6/15/20`     <dbl> 0, 368, 383, 238, 111, 116, 307, 544, 204, 475, 53, 1...
## $ `6/16/20`     <dbl> 0, 373, 389, 245, 116, 121, 310, 551, 206, 485, 53, 1...
## $ `6/17/20`     <dbl> 0, 375, 392, 251, 118, 123, 313, 554, 208, 486, 53, 1...
## $ `6/18/20`     <dbl> 0, 400, 401, 263, 121, 130, 320, 566, 210, 501, 55, 1...
## $ `6/19/20`     <dbl> 0, 411, 413, 266, 126, 139, 320, 569, 210, 507, 58, 1...
## $ `6/20/20`     <dbl> 0, 431, 420, 272, 126, 143, 327, 572, 211, 516, 58, 1...
## $ `6/21/20`     <dbl> 0, 434, 430, 272, 127, 149, 327, 576, 213, 521, 58, 1...
## $ `6/22/20`     <dbl> 0, 442, 437, 277, 129, 153, 328, 578, 215, 528, 58, 1...
## $ `6/23/20`     <dbl> 0, 453, 450, 280, 135, 159, 329, 581, 216, 534, 58, 1...
## $ `6/24/20`     <dbl> 0, 469, 464, 288, 141, 168, 336, 584, 220, 543, 58, 1...
## $ `6/25/20`     <dbl> 0, 479, 477, 305, 149, 176, 351, 588, 233, 549, 64, 1...
## $ `6/26/20`     <dbl> 0, 488, 515, 312, 153, 184, 351, 594, 236, 559, 68, 1...
## $ `6/27/20`     <dbl> 0, 498, 555, 317, 161, 188, 358, 600, 245, 561, 69, 2...
## $ `6/28/20`     <dbl> 0, 503, 575, 317, 162, 189, 358, 602, 245, 561, 70, 2...
## $ `6/29/20`     <dbl> 0, 527, 643, 322, 165, 199, 365, 605, 269, 585, 73, 2...
## $ `6/30/20`     <dbl> 0, 537, 680, 325, 170, 208, 365, 607, 276, 590, 74, 2...
## $ `7/1/20`      <dbl> 0, 553, 703, 326, 174, 218, 367, 607, 278, 595, 77, 2...
## $ `7/2/20`      <dbl> 0, 561, 751, 335, 179, 222, 369, 610, 288, 611, 82, 2...
## $ `7/3/20`      <dbl> 0, 568, 845, 348, 189, 230, 372, 625, 330, 625, 88, 2...
## $ `7/4/20`      <dbl> 0, 591, 863, 350, 190, 234, 373, 626, 340, 637, 88, 2...
## $ `7/5/20`      <dbl> 0, 615, 881, 352, 193, 239, 373, 634, 362, 642, 100, ...
## $ `7/6/20`      <dbl> 0, 618, 911, 356, 197, 247, 373, 634, 384, 655, 105, ...
## $ `7/7/20`      <dbl> 0, 644, 997, 360, 199, 255, 373, 634, 395, 656, 106, ...
## $ `7/8/20`      <dbl> 0, 651, 1056, 366, 201, 262, 374, 639, 411, 660, 114,...
## $ `7/9/20`      <dbl> 0, 661, 1131, 371, 211, 282, 375, 646, 445, 672, 115,...
## $ `7/10/20`     <dbl> 0, 670, 1187, 381, 218, 292, 381, 648, 465, 679, 118,...
## $ `7/11/20`     <dbl> 0, 684, 1224, 398, 224, 307, 382, 654, 500, 690, 128,...
## $ `7/12/20`     <dbl> 0, 706, 1294, 403, 228, 331, 383, 655, 526, 693, 129,...
## $ `7/13/20`     <dbl> 0, 728, 1359, 413, 231, 350, 383, 660, 566, 702, 136,...
## $ `7/14/20`     <dbl> 0, 746, 1414, 428, 236, 366, 385, 661, 589, 712, 140,...
## $ `7/15/20`     <dbl> 0, 756, 1518, 441, 242, 389, 386, 664, 655, 718, 145,...
## $ `7/16/20`     <dbl> 0, 780, 1599, 459, 247, 424, 389, 669, 675, 731, 152,...
## $ `7/17/20`     <dbl> 0, 789, 1689, 463, 255, 440, 393, 672, 720, 742, 157,...
## $ `7/18/20`     <dbl> 0, 827, 1819, 483, 264, 458, 397, 678, 741, 756, 165,...
## $ `7/19/20`     <dbl> 0, 842, 1937, 495, 269, 482, 398, 686, 785, 762, 173,...
## $ `7/20/20`     <dbl> 0, 857, 2013, 503, 279, 507, 400, 689, 832, 767, 179,...
## $ `7/21/20`     <dbl> 0, 865, 2102, 514, 283, 524, 401, 695, 869, 774, 182,...
## $ `7/22/20`     <dbl> 0, 886, 2196, 518, 287, 547, 407, 701, 891, 782, 184,...
## $ `7/23/20`     <dbl> 0, 905, 2461, 534, 289, 585, 408, 706, 934, 789, 193,...
## $ `7/24/20`     <dbl> 0, 921, 2513, 539, 303, 615, 411, 711, 999, 797, 205,...
## $ `7/25/20`     <dbl> 0, 932, 2662, 552, 318, 637, 414, 720, 1062, 810, 207...
## $ `7/26/20`     <dbl> 0, 942, 2708, 562, 324, 646, 415, 724, 1113, 821, 209...
## $ `7/27/20`     <dbl> 0, 965, 2770, 569, 334, 669, 416, 730, 1194, 825, 220...
## $ `7/28/20`     <dbl> 0, 974, 2835, 575, 337, 675, 429, 734, 1243, 836, 221...
## $ `7/29/20`     <dbl> 0, 974, 2835, 575, 338, 675, 429, 734, 1244, 836, 221...
## $ `7/30/20`     <dbl> 0, 1002, 3028, 585, 352, 731, 435, 747, 1336, 848, 23...
## $ `7/31/20`     <dbl> 0, 1015, 3101, 598, 363, 767, 437, 753, 1450, 859, 23...
## $ `8/1/20`      <dbl> 0, 1030, 3142, 602, 368, 792, 443, 757, 1480, 861, 24...
## $ `8/2/20`      <dbl> 0, 1052, 3223, 610, 372, 813, 445, 765, 1580, 868, 25...
## $ `8/3/20`      <dbl> 0, 1066, 3265, 612, 382, 830, 446, 766, 1612, 875, 26...
## $ `8/4/20`      <dbl> 0, 1073, 3320, 614, 389, 836, 449, 766, 1646, 882, 26...
## $ `8/5/20`      <dbl> 0, 1073, 3380, 615, 392, 839, 452, 769, 1683, 886, 27...
## $ `8/6/20`      <dbl> 0, 1096, 3438, 619, 421, 874, 458, 771, 1741, 893, 28...
## $ `8/7/20`      <dbl> 0, 1113, 3504, 624, 424, 909, 462, 774, 1777, 899, 29...
## $ `8/8/20`      <dbl> 0, 1134, 3564, 628, 434, 923, 471, 773, 1836, 904, 29...
## $ `8/9/20`      <dbl> 0, 1215, 3606, 630, 446, 934, 472, 779, 1860, 906, 30...
## $ `8/10/20`     <dbl> 0, 1215, 3714, 631, 450, 947, 474, 782, 1883, 909, 30...
## $ `8/11/20`     <dbl> 0, 1215, 3736, 643, 455, 958, 489, 785, 1914, 916, 30...
## $ `8/12/20`     <dbl> 0, 1241, 3776, 646, 464, 967, 500, 788, 1935, 918, 31...
## $ `8/13/20`     <dbl> 0, 1250, 3813, 651, 469, 977, 501, 790, 1959, 919, 32...
## $ `8/14/20`     <dbl> 0, 1252, 3860, 656, 477, 989, 502, 796, 1975, 922, 32...
## $ `8/15/20`     <dbl> 0, 1262, 3909, 663, 483, 996, 503, 807, 2019, 925, 33...
## $ `8/16/20`     <dbl> 0, 1273, 3948, 671, 483, 1005, 504, 811, 2037, 927, 3...
## $ `8/17/20`     <dbl> 0, 1274, 3960, 672, 488, 1008, 504, 814, 2055, 928, 3...
## $ `8/18/20`     <dbl> 0, 1291, 3977, 674, 490, 1034, 512, 814, 2107, 937, 3...
## $ `8/19/20`     <dbl> 0, 1293, 4002, 683, 503, 1049, 530, 814, 2159, 941, 3...
## $ `8/20/20`     <dbl> 0, 1293, 4035, 690, 507, 1077, 534, 814, 2214, 949, 3...
## $ `8/21/20`     <dbl> 0, 1293, 4054, 690, 509, 1083, 534, 814, 2228, 952, 3...
## $ `8/22/20`     <dbl> 0, 1322, 4115, 699, 516, 1096, 536, 822, 2276, 957, 3...
## $ `8/23/20`     <dbl> 0, 1324, 4147, 702, 523, 1099, 536, 824, 2286, 958, 3...
## $ `8/24/20`     <dbl> 0, 1351, 4167, 720, 526, 1135, 536, 825, 2327, 971, 3...
## $ `8/25/20`     <dbl> 0, 1355, 4190, 724, 527, 1160, 536, 826, 2345, 973, 3...
## $ `8/26/20`     <dbl> 0, 1366, 4265, 732, 530, 1195, 537, 833, 2400, 983, 3...
## $ `8/27/20`     <dbl> 0, 1377, 4311, 739, 533, 1213, 538, 839, 2413, 1011, ...
## $ `8/28/20`     <dbl> 0, 1389, 4347, 745, 535, 1219, 541, 840, 2443, 1017, ...
## $ `8/29/20`     <dbl> 0, 1400, 4424, 753, 540, 1248, 546, 855, 2499, 1024, ...
## $ `8/30/20`     <dbl> 0, 1438, 4525, 757, 550, 1277, 550, 864, 2533, 1027, ...
## $ `8/31/20`     <dbl> 0, 1442, 4545, 757, 554, 1287, 551, 866, 2567, 1033, ...
## $ `9/1/20`      <dbl> 0, 1453, 4568, 764, 558, 1303, 559, 871, 2619, 1041, ...
## Observations: 715,680
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 228
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/8/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/9/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/10/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/11/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/12/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/13/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/14/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/15/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/16/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/17/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/18/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/19/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/20/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/21/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 13, 0, 0, 0, 1, 0, 0, 0, 1...
## $ `4/22/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 16, 0, 0, 0, 1, 0, 1, 0, 1...
## $ `4/23/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 16, 0, 1, 0, 1, 1, 1, 0, 1...
## $ `4/24/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 17, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/25/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/26/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/27/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/28/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 19, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/29/20`     <dbl> 0, 3, 2, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/30/20`     <dbl> 0, 3, 3, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/1/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/2/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/3/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/4/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/5/20`      <dbl> 0, 3, 5, 1, 0, 0, 0, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/6/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/7/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/8/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 3, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/9/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/10/20`     <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/11/20`     <dbl> 0, 3, 6, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/12/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 21, 0, 1, 1, 1, 1, 1, 1, 2...
## $ `5/13/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 22, 0, 1, 2, 1, 1, 1, 1, 2...
## $ `5/14/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 8, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/15/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/16/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/17/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/18/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/19/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/20/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/21/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/22/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/23/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/24/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 12, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/25/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 12, 3, 24, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/26/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/27/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/28/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 15, 3, 24, 2, 1, 8, 2, 2, 1, 1, ...
## $ `5/29/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 16, 3, 24, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/30/20`     <dbl> 0, 4, 9, 1, 1, 1, 4, 17, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/31/20`     <dbl> 0, 4, 9, 1, 1, 1, 5, 18, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `6/1/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 25, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/2/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/3/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/4/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/5/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 21, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/6/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/7/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/8/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/9/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/10/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/11/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/12/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/13/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/14/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/15/20`     <dbl> 0, 6, 9, 1, 1, 1, 9, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/16/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/17/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/18/20`     <dbl> 0, 8, 9, 1, 1, 1, 9, 25, 4, 26, 5, 3, 11, 4, 2, 1, 1,...
## $ `6/19/20`     <dbl> 0, 8, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 11, 4, 2, 1, 1...
## $ `6/20/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/21/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/22/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/23/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 4, 2, 1, 1...
## $ `6/24/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/25/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/26/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/27/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/28/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/29/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/30/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/1/20`      <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/2/20`      <dbl> 0, 13, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/3/20`      <dbl> 0, 13, 10, 2, 1, 1, 10, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/4/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/5/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/6/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/7/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/8/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/9/20`      <dbl> 0, 14, 11, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/10/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/11/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/12/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/13/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/14/20`     <dbl> 0, 18, 12, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/15/20`     <dbl> 0, 19, 13, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/16/20`     <dbl> 0, 20, 14, 3, 2, 1, 11, 32, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/17/20`     <dbl> 0, 21, 14, 3, 2, 1, 11, 33, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/18/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/19/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/20/20`     <dbl> 0, 21, 15, 4, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/21/20`     <dbl> 0, 21, 16, 4, 2, 1, 11, 34, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/22/20`     <dbl> 0, 21, 16, 4, 2, 1, 12, 34, 6, 34, 7, 5, 12, 8, 2, 1,...
## $ `7/23/20`     <dbl> 0, 21, 17, 4, 2, 1, 12, 35, 6, 34, 7, 5, 12, 9, 2, 1,...
## $ `7/24/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/25/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/26/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/27/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/28/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/29/20`     <dbl> 0, 21, 21, 4, 2, 3, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/30/20`     <dbl> 0, 21, 21, 5, 2, 3, 12, 36, 8, 38, 8, 6, 12, 9, 5, 1,...
## $ `7/31/20`     <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/1/20`      <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/2/20`      <dbl> 0, 21, 23, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/3/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/4/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 12, 38, 8, 7, 12, 9, 5, 1...
## $ `8/5/20`      <dbl> 0, 22, 24, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/6/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/7/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/8/20`      <dbl> 0, 22, 26, 5, 5, 4, 12, 37, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/9/20`      <dbl> 0, 22, 27, 5, 5, 4, 12, 37, 14, 38, 8, 8, 12, 9, 5, 1...
## $ `8/10/20`     <dbl> 0, 22, 28, 5, 5, 4, 12, 37, 17, 38, 9, 9, 12, 10, 5, ...
## $ `8/11/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 19, 38, 9, 12, 12, 10, 5,...
## $ `8/12/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/13/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/14/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/15/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/16/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/17/20`     <dbl> 0, 23, 32, 6, 6, 5, 14, 37, 23, 38, 9, 12, 12, 10, 5,...
## $ `8/18/20`     <dbl> 0, 23, 33, 6, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/19/20`     <dbl> 0, 23, 33, 7, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/20/20`     <dbl> 0, 23, 34, 7, 6, 5, 14, 37, 25, 38, 10, 12, 12, 10, 5...
## $ `8/21/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 5...
## $ `8/22/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/23/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/24/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 27, 38, 10, 13, 12, 11, 6...
## $ `8/25/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 28, 39, 10, 13, 12, 11, 6...
## $ `8/26/20`     <dbl> 0, 23, 36, 7, 6, 7, 14, 37, 28, 39, 10, 13, 12, 12, 6...
## $ `8/27/20`     <dbl> 0, 23, 37, 7, 6, 7, 14, 37, 30, 39, 12, 13, 12, 13, 6...
## $ `8/28/20`     <dbl> 0, 23, 39, 7, 6, 9, 14, 37, 32, 40, 12, 13, 12, 13, 6...
## $ `8/29/20`     <dbl> 0, 23, 40, 7, 7, 9, 14, 37, 35, 40, 12, 13, 12, 14, 6...
## $ `8/30/20`     <dbl> 0, 23, 40, 7, 7, 10, 14, 37, 35, 40, 12, 13, 12, 14, ...
## $ `8/31/20`     <dbl> 0, 23, 42, 7, 8, 11, 14, 37, 36, 40, 12, 13, 12, 14, ...
## $ `9/1/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 13, 12, 14, ...
## Observations: 715,680
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
# STEP 3: Explore the cases and deaths by county (can be repeated for other counties)
casesDeathsByCounty(useDate=maxDate_20200903, 
                    inclStates=c("FL", "GA", "SC", "AL", "MS"), 
                    caseData=rawUSAFacts_20200903$cases, 
                    deathData=rawUSAFacts_20200903$deaths,
                    highCaseAmount=80000, 
                    highDeathAmount=2000
                    )

# STEP 4: Create county-level clusters (k-means, 5 clusters, minimum county population 25k)
clust_20200903_new <- prepClusterCounties(burdenFile=burden_20200903_new, 
                                          maxDate=maxDate_20200903, 
                                          minPop=25000, 
                                          hierarchical=FALSE, 
                                          minShape=3,
                                          ratioDeathvsCase = 5,
                                          ratioTotalvsShape = 0.5,
                                          minDeath=100,
                                          minCase=5000,
                                          nCenters=5,
                                          testCenters=1:25,
                                          iter.max=20,
                                          nstart=10,
                                          seed=2009081450
                                          )
## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0352              0          0.266           0.140
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Cluster means and counts
##                  1      2     3      4      5
## .           501.00 473.00 53.00 199.00 365.00
## totalCases    0.69   0.32  1.23   1.14   0.52
## totalDeaths   1.36   0.36  9.67   4.76   1.86
## cases_3       0.01   0.02  0.08   0.02   0.02
## deaths_3      0.05   0.08  0.12   0.05   0.09
## cases_4       0.04   0.06  0.39   0.14   0.17
## deaths_4      0.35   0.54  2.07   0.85   1.45
## cases_5       0.05   0.08  0.17   0.14   0.17
## deaths_5      0.29   0.34  1.29   0.99   1.72
## cases_6       0.13   0.11  0.10   0.15   0.12
## deaths_6      0.36   0.52  0.62   0.70   0.73
## cases_7       0.39   0.27  0.14   0.32   0.24
## deaths_7      1.25   0.69  0.46   1.09   0.45
## cases_8       0.38   0.32  0.12   0.24   0.25
## deaths_8      2.61   0.58  0.43   1.32   0.51
# STEP 5: Extract the clusters from the clustering object
clustVec_county_20200903_new <- clust_20200903_new$objCluster$objCluster$cluster

# STEP 6: Assess the quality of the new clusters
helperACC_county_20200903_new <- helperAssessCountyClusters(clustVec_county_20200903_new, 
                                                            dfPop=clust_20200903_new$countyFiltered, 
                                                            dfBurden=clust_20200903_new$countyFiltered, 
                                                            thruLabel="Sep 3, 2020", 
                                                            plotsTogether=TRUE, 
                                                            orderCluster=TRUE
                                                            )

## 
## Recency is defined as 2020-08-02 through current
## 
## Recency is defined as 2020-08-02 through current

# STEP 7: Create a plot of cumulative burden by cluster
helperACC_county_20200903_new %>%
    select(cluster, date, pop, cases, deaths) %>%
    group_by(cluster, date) %>%
    summarize_if(is.numeric, sum, na.rm=TRUE) %>%
    arrange(date) %>%
    mutate(cpmcum=cumsum(cases)*1000000/pop, dpmcum=cumsum(deaths)*1000000/pop) %>%
    ungroup() %>%
    select(cluster, date, cases=cpmcum, deaths=dpmcum) %>%
    pivot_longer(-c(cluster, date)) %>%
    ggplot(aes(x=date, y=value, color=cluster)) + 
    geom_line(size=1) + 
    geom_text(data=~filter(., date==max(date)), 
              aes(x=date+lubridate::days(2), label=round(value)), 
              size=3, 
              hjust=0
              ) +
    labs(x="", title="Cumulative burden per million people by segment", y="") +
    facet_wrap(~c("cases"="Cases per million", "deaths"="Deaths per million")[name], scales="free_y") + 
    scale_x_date(date_breaks="1 months", date_labels="%b", expand=expand_scale(c(0, 0.1)))

# STEP 8: Add back clusters not used for analysis (code 999) and associated disease data
clusterStateData_20200903_new <- helperMakeClusterStateData(helperACC_county_20200903_new, 
                                                            dfBurden=clust_20200903_new$countyDailyPerCapita,
                                                            orderCluster=TRUE
                                                            )
## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"
# STEP 9: Run an example state-level summary (can expand to other states)
stateCountySummary(states=c("MN", "ND", "SD", "WI"),
                   df=changeOrderLabel(clusterStateData_20200903_new, grpVars="fipsCounty"),
                   keyDate=maxDate_20200903,
                   showQuadrants=TRUE, 
                   showCumulative=TRUE, 
                   facetCumulativeByState = TRUE, 
                   showAllFactorLevels = TRUE
                   )

# STEP 10a: Read in updated raw data
caseFile_20200917 <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20200917.csv"
deathFile_20200917 <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20200917.csv"
maxDate_20200917 <- "2020-09-15"

rawUSAFacts_20200917 <- readPivotUSAFacts(popFile=popFile, 
                                          caseFile=caseFile_20200917, 
                                          deathFile=deathFile_20200917, 
                                          unassignedDate=maxDate_20200917
                                          )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Joining, by = c("state", "date", "unassigned")

# STEP 10b: Use existing state-level segments with updated raw data
burden_20200917 <- readUSAFacts(
    caseFile=caseFile_20200917, 
    deathFile=deathFile_20200917,
    stateClusters=test_hier5_201001$useClusters
    )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 242
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 1, 4, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 4, 4, 0, 0, 1, 0, 1, 2, 10, 1, 1, 0, 0, 1, 1, 0, 1...
## $ `3/26/20`     <dbl> 0, 6, 5, 0, 0, 2, 2, 1, 2, 13, 1, 4, 1, 0, 1, 1, 0, 1...
## $ `3/27/20`     <dbl> 0, 6, 5, 0, 0, 5, 2, 1, 3, 15, 1, 7, 1, 0, 1, 3, 0, 1...
## $ `3/28/20`     <dbl> 0, 6, 10, 0, 0, 5, 2, 1, 3, 17, 1, 7, 1, 0, 2, 4, 0, ...
## $ `3/29/20`     <dbl> 0, 6, 15, 0, 0, 5, 2, 1, 3, 27, 2, 8, 1, 0, 2, 5, 0, ...
## $ `3/30/20`     <dbl> 0, 7, 18, 0, 2, 5, 2, 1, 9, 36, 2, 10, 2, 0, 2, 5, 0,...
## $ `3/31/20`     <dbl> 0, 7, 19, 0, 3, 5, 2, 1, 9, 36, 2, 11, 3, 0, 2, 5, 0,...
## $ `4/1/20`      <dbl> 0, 10, 23, 0, 3, 5, 2, 1, 11, 45, 2, 13, 4, 2, 3, 6, ...
## $ `4/2/20`      <dbl> 0, 10, 25, 0, 4, 6, 2, 1, 12, 67, 4, 14, 4, 2, 7, 6, ...
## $ `4/3/20`      <dbl> 0, 12, 28, 1, 4, 9, 2, 1, 20, 81, 5, 15, 4, 3, 8, 7, ...
## $ `4/4/20`      <dbl> 0, 12, 29, 2, 4, 10, 2, 1, 21, 87, 6, 15, 4, 7, 9, 7,...
## $ `4/5/20`      <dbl> 0, 12, 34, 2, 7, 10, 2, 1, 24, 90, 6, 18, 5, 9, 9, 7,...
## $ `4/6/20`      <dbl> 0, 12, 38, 3, 7, 10, 2, 1, 38, 96, 6, 20, 6, 9, 9, 9,...
## $ `4/7/20`      <dbl> 0, 12, 42, 3, 8, 10, 2, 2, 48, 102, 6, 20, 6, 10, 9, ...
## $ `4/8/20`      <dbl> 0, 12, 49, 3, 9, 10, 3, 3, 52, 140, 7, 22, 6, 10, 11,...
## $ `4/9/20`      <dbl> 0, 17, 59, 7, 11, 11, 4, 3, 54, 161, 7, 25, 6, 13, 11...
## $ `4/10/20`     <dbl> 0, 17, 59, 9, 11, 12, 4, 3, 54, 171, 7, 27, 7, 13, 11...
## $ `4/11/20`     <dbl> 0, 19, 66, 10, 13, 12, 4, 6, 57, 184, 7, 30, 9, 15, 1...
## $ `4/12/20`     <dbl> 0, 19, 71, 10, 16, 13, 4, 7, 60, 200, 9, 30, 10, 19, ...
## $ `4/13/20`     <dbl> 0, 19, 78, 10, 17, 15, 6, 8, 61, 212, 9, 33, 10, 19, ...
## $ `4/14/20`     <dbl> 0, 23, 87, 11, 17, 16, 8, 8, 62, 216, 9, 33, 12, 21, ...
## $ `4/15/20`     <dbl> 0, 25, 98, 13, 19, 17, 8, 11, 62, 227, 10, 37, 13, 22...
## $ `4/16/20`     <dbl> 0, 25, 102, 14, 23, 18, 8, 11, 63, 234, 11, 37, 13, 2...
## $ `4/17/20`     <dbl> 0, 25, 103, 15, 23, 20, 8, 13, 63, 236, 12, 37, 13, 2...
## $ `4/18/20`     <dbl> 0, 25, 109, 18, 26, 20, 9, 13, 66, 240, 12, 39, 14, 2...
## $ `4/19/20`     <dbl> 0, 27, 114, 20, 28, 21, 9, 14, 72, 246, 12, 42, 14, 2...
## $ `4/20/20`     <dbl> 0, 28, 117, 22, 32, 22, 11, 14, 80, 257, 12, 43, 17, ...
## $ `4/21/20`     <dbl> 0, 30, 123, 28, 32, 26, 11, 15, 83, 259, 12, 44, 18, ...
## $ `4/22/20`     <dbl> 0, 32, 132, 29, 33, 29, 11, 17, 85, 270, 12, 46, 21, ...
## $ `4/23/20`     <dbl> 0, 33, 143, 30, 33, 31, 12, 19, 88, 275, 12, 47, 22, ...
## $ `4/24/20`     <dbl> 0, 36, 147, 32, 34, 31, 12, 21, 89, 282, 12, 49, 25, ...
## $ `4/25/20`     <dbl> 0, 37, 154, 33, 35, 31, 12, 28, 90, 284, 12, 49, 27, ...
## $ `4/26/20`     <dbl> 0, 37, 161, 33, 38, 34, 12, 32, 90, 285, 14, 51, 32, ...
## $ `4/27/20`     <dbl> 0, 39, 168, 35, 42, 34, 12, 34, 90, 289, 14, 51, 39, ...
## $ `4/28/20`     <dbl> 0, 40, 171, 37, 42, 34, 12, 45, 92, 290, 15, 52, 39, ...
## $ `4/29/20`     <dbl> 0, 42, 173, 37, 42, 36, 12, 51, 93, 290, 15, 52, 39, ...
## $ `4/30/20`     <dbl> 0, 42, 174, 39, 42, 37, 13, 53, 93, 290, 15, 52, 43, ...
## $ `5/1/20`      <dbl> 0, 42, 175, 42, 42, 39, 14, 65, 93, 290, 15, 52, 49, ...
## $ `5/2/20`      <dbl> 0, 45, 181, 43, 42, 40, 14, 92, 98, 294, 15, 54, 49, ...
## $ `5/3/20`      <dbl> 0, 48, 187, 45, 42, 40, 14, 105, 105, 300, 16, 57, 49...
## $ `5/4/20`      <dbl> 0, 53, 188, 45, 42, 40, 16, 114, 105, 302, 16, 58, 51...
## $ `5/5/20`      <dbl> 0, 53, 189, 47, 43, 40, 18, 120, 114, 304, 17, 60, 54...
## $ `5/6/20`      <dbl> 0, 58, 196, 47, 43, 42, 18, 130, 114, 306, 18, 61, 54...
## $ `5/7/20`      <dbl> 0, 61, 205, 51, 44, 44, 18, 155, 120, 308, 18, 63, 56...
## $ `5/8/20`      <dbl> 0, 67, 208, 53, 44, 44, 21, 162, 123, 311, 21, 63, 59...
## $ `5/9/20`      <dbl> 0, 68, 216, 58, 45, 44, 22, 178, 124, 314, 22, 64, 61...
## $ `5/10/20`     <dbl> 0, 74, 222, 59, 46, 44, 23, 189, 124, 316, 22, 65, 66...
## $ `5/11/20`     <dbl> 0, 84, 224, 61, 46, 45, 26, 196, 125, 319, 24, 67, 67...
## $ `5/12/20`     <dbl> 0, 91, 227, 67, 46, 45, 26, 224, 126, 324, 24, 69, 69...
## $ `5/13/20`     <dbl> 0, 93, 231, 69, 46, 45, 28, 230, 127, 324, 24, 73, 72...
## $ `5/14/20`     <dbl> 0, 103, 243, 74, 46, 45, 28, 249, 128, 326, 25, 74, 7...
## $ `5/15/20`     <dbl> 0, 103, 244, 79, 49, 45, 32, 258, 129, 326, 26, 75, 8...
## $ `5/16/20`     <dbl> 0, 110, 254, 79, 50, 45, 35, 271, 130, 328, 27, 77, 8...
## $ `5/17/20`     <dbl> 0, 110, 254, 81, 50, 46, 35, 272, 130, 328, 27, 77, 8...
## $ `5/18/20`     <dbl> 0, 120, 260, 85, 50, 47, 40, 285, 133, 329, 28, 79, 8...
## $ `5/19/20`     <dbl> 0, 127, 262, 90, 51, 47, 52, 295, 133, 329, 29, 80, 9...
## $ `5/20/20`     <dbl> 0, 136, 270, 96, 52, 47, 64, 312, 136, 330, 30, 83, 1...
## $ `5/21/20`     <dbl> 0, 147, 270, 100, 52, 48, 71, 321, 136, 330, 31, 84, ...
## $ `5/22/20`     <dbl> 0, 149, 271, 104, 55, 49, 89, 329, 137, 330, 33, 85, ...
## $ `5/23/20`     <dbl> 0, 155, 273, 105, 58, 49, 105, 335, 138, 330, 33, 86,...
## $ `5/24/20`     <dbl> 0, 159, 274, 110, 59, 49, 111, 344, 141, 336, 33, 87,...
## $ `5/25/20`     <dbl> 0, 173, 276, 116, 62, 49, 141, 368, 147, 337, 33, 87,...
## $ `5/26/20`     <dbl> 0, 189, 277, 122, 66, 51, 167, 380, 150, 338, 33, 90,...
## $ `5/27/20`     <dbl> 0, 192, 281, 130, 71, 53, 176, 391, 152, 340, 33, 93,...
## $ `5/28/20`     <dbl> 0, 205, 281, 132, 71, 58, 185, 392, 152, 349, 34, 97,...
## $ `5/29/20`     <dbl> 0, 212, 282, 147, 71, 60, 201, 396, 153, 352, 36, 99,...
## $ `5/30/20`     <dbl> 0, 216, 283, 150, 72, 61, 203, 402, 154, 353, 37, 100...
## $ `5/31/20`     <dbl> 0, 220, 288, 164, 75, 62, 209, 410, 157, 355, 37, 100...
## $ `6/1/20`      <dbl> 0, 233, 292, 172, 76, 63, 209, 414, 164, 358, 38, 103...
## $ `6/2/20`      <dbl> 0, 238, 292, 175, 76, 63, 212, 416, 165, 358, 38, 104...
## $ `6/3/20`      <dbl> 0, 239, 292, 177, 76, 63, 215, 419, 165, 359, 38, 105...
## $ `6/4/20`      <dbl> 0, 241, 293, 177, 76, 63, 217, 421, 167, 360, 38, 107...
## $ `6/5/20`      <dbl> 0, 248, 296, 183, 76, 64, 219, 431, 169, 363, 38, 108...
## $ `6/6/20`      <dbl> 0, 259, 304, 190, 77, 70, 225, 442, 174, 373, 40, 108...
## $ `6/7/20`      <dbl> 0, 265, 313, 193, 77, 72, 232, 449, 176, 378, 42, 110...
## $ `6/8/20`      <dbl> 0, 272, 320, 197, 79, 73, 238, 455, 178, 383, 42, 111...
## $ `6/9/20`      <dbl> 0, 282, 325, 199, 85, 75, 243, 464, 180, 391, 42, 117...
## $ `6/10/20`     <dbl> 0, 295, 331, 208, 89, 79, 248, 471, 182, 401, 42, 118...
## $ `6/11/20`     <dbl> 0, 312, 343, 214, 93, 87, 253, 484, 184, 417, 42, 121...
## $ `6/12/20`     <dbl> 0, 323, 353, 221, 97, 95, 258, 499, 188, 427, 46, 122...
## $ `6/13/20`     <dbl> 0, 331, 361, 226, 100, 102, 276, 517, 190, 438, 47, 1...
## $ `6/14/20`     <dbl> 0, 357, 364, 234, 104, 110, 302, 536, 195, 453, 51, 1...
## $ `6/15/20`     <dbl> 0, 368, 383, 238, 111, 116, 307, 544, 204, 475, 53, 1...
## $ `6/16/20`     <dbl> 0, 373, 389, 245, 116, 121, 310, 551, 206, 485, 53, 1...
## $ `6/17/20`     <dbl> 0, 375, 392, 251, 118, 123, 313, 554, 208, 486, 53, 1...
## $ `6/18/20`     <dbl> 0, 400, 401, 263, 121, 130, 320, 566, 210, 501, 55, 1...
## $ `6/19/20`     <dbl> 0, 411, 413, 266, 126, 139, 320, 569, 210, 507, 58, 1...
## $ `6/20/20`     <dbl> 0, 431, 420, 272, 126, 143, 327, 572, 211, 516, 58, 1...
## $ `6/21/20`     <dbl> 0, 434, 430, 272, 127, 149, 327, 576, 213, 521, 58, 1...
## $ `6/22/20`     <dbl> 0, 442, 437, 277, 129, 153, 328, 578, 215, 528, 58, 1...
## $ `6/23/20`     <dbl> 0, 453, 450, 280, 135, 159, 329, 581, 216, 534, 58, 1...
## $ `6/24/20`     <dbl> 0, 469, 464, 288, 141, 168, 336, 584, 220, 543, 58, 1...
## $ `6/25/20`     <dbl> 0, 479, 477, 305, 149, 176, 351, 588, 233, 549, 64, 1...
## $ `6/26/20`     <dbl> 0, 488, 515, 312, 153, 184, 351, 594, 236, 559, 68, 1...
## $ `6/27/20`     <dbl> 0, 498, 555, 317, 161, 188, 358, 600, 245, 561, 69, 2...
## $ `6/28/20`     <dbl> 0, 503, 575, 317, 162, 189, 358, 602, 245, 561, 70, 2...
## $ `6/29/20`     <dbl> 0, 527, 643, 322, 165, 199, 365, 605, 269, 585, 73, 2...
## $ `6/30/20`     <dbl> 0, 537, 680, 325, 170, 208, 365, 607, 276, 590, 74, 2...
## $ `7/1/20`      <dbl> 0, 553, 703, 326, 174, 218, 367, 607, 278, 595, 77, 2...
## $ `7/2/20`      <dbl> 0, 561, 751, 335, 179, 222, 369, 610, 288, 611, 82, 2...
## $ `7/3/20`      <dbl> 0, 568, 845, 348, 189, 230, 372, 625, 330, 625, 88, 2...
## $ `7/4/20`      <dbl> 0, 591, 863, 350, 190, 234, 373, 626, 340, 637, 88, 2...
## $ `7/5/20`      <dbl> 0, 615, 881, 352, 193, 239, 373, 634, 362, 642, 100, ...
## $ `7/6/20`      <dbl> 0, 618, 911, 356, 197, 247, 373, 634, 384, 655, 105, ...
## $ `7/7/20`      <dbl> 0, 644, 997, 360, 199, 255, 373, 634, 395, 656, 106, ...
## $ `7/8/20`      <dbl> 0, 651, 1056, 366, 201, 262, 374, 639, 411, 660, 114,...
## $ `7/9/20`      <dbl> 0, 661, 1131, 371, 211, 282, 375, 646, 445, 672, 115,...
## $ `7/10/20`     <dbl> 0, 670, 1187, 381, 218, 292, 381, 648, 465, 679, 118,...
## $ `7/11/20`     <dbl> 0, 684, 1224, 398, 224, 307, 382, 654, 500, 690, 128,...
## $ `7/12/20`     <dbl> 0, 706, 1294, 403, 228, 331, 383, 655, 526, 693, 129,...
## $ `7/13/20`     <dbl> 0, 728, 1359, 413, 231, 350, 383, 660, 566, 702, 136,...
## $ `7/14/20`     <dbl> 0, 746, 1414, 428, 236, 366, 385, 661, 589, 712, 140,...
## $ `7/15/20`     <dbl> 0, 756, 1518, 441, 242, 389, 386, 664, 655, 718, 145,...
## $ `7/16/20`     <dbl> 0, 780, 1599, 459, 247, 424, 389, 669, 675, 731, 152,...
## $ `7/17/20`     <dbl> 0, 789, 1689, 463, 255, 440, 393, 672, 720, 742, 157,...
## $ `7/18/20`     <dbl> 0, 827, 1819, 483, 264, 458, 397, 678, 741, 756, 165,...
## $ `7/19/20`     <dbl> 0, 842, 1937, 495, 269, 482, 398, 686, 785, 762, 173,...
## $ `7/20/20`     <dbl> 0, 857, 2013, 503, 279, 507, 400, 689, 832, 767, 179,...
## $ `7/21/20`     <dbl> 0, 865, 2102, 514, 283, 524, 401, 695, 869, 774, 182,...
## $ `7/22/20`     <dbl> 0, 886, 2196, 518, 287, 547, 407, 701, 891, 782, 184,...
## $ `7/23/20`     <dbl> 0, 905, 2461, 534, 289, 585, 408, 706, 934, 789, 193,...
## $ `7/24/20`     <dbl> 0, 921, 2513, 539, 303, 615, 411, 711, 999, 797, 205,...
## $ `7/25/20`     <dbl> 0, 932, 2662, 552, 318, 637, 414, 720, 1062, 810, 207...
## $ `7/26/20`     <dbl> 0, 942, 2708, 562, 324, 646, 415, 724, 1113, 821, 209...
## $ `7/27/20`     <dbl> 0, 965, 2770, 569, 334, 669, 416, 730, 1194, 825, 220...
## $ `7/28/20`     <dbl> 0, 974, 2835, 575, 337, 675, 429, 734, 1243, 836, 221...
## $ `7/29/20`     <dbl> 0, 974, 2835, 575, 338, 675, 429, 734, 1244, 836, 221...
## $ `7/30/20`     <dbl> 0, 1002, 3028, 585, 352, 731, 435, 747, 1336, 848, 23...
## $ `7/31/20`     <dbl> 0, 1015, 3101, 598, 363, 767, 437, 753, 1450, 859, 23...
## $ `8/1/20`      <dbl> 0, 1030, 3142, 602, 368, 792, 443, 757, 1480, 861, 24...
## $ `8/2/20`      <dbl> 0, 1052, 3223, 610, 372, 813, 445, 765, 1580, 868, 25...
## $ `8/3/20`      <dbl> 0, 1066, 3265, 612, 382, 830, 446, 766, 1612, 875, 26...
## $ `8/4/20`      <dbl> 0, 1073, 3320, 614, 389, 836, 449, 766, 1646, 882, 26...
## $ `8/5/20`      <dbl> 0, 1073, 3380, 615, 392, 839, 452, 769, 1683, 886, 27...
## $ `8/6/20`      <dbl> 0, 1096, 3438, 619, 421, 874, 458, 771, 1741, 893, 28...
## $ `8/7/20`      <dbl> 0, 1113, 3504, 624, 424, 909, 462, 774, 1777, 899, 29...
## $ `8/8/20`      <dbl> 0, 1134, 3564, 628, 434, 923, 471, 773, 1836, 904, 29...
## $ `8/9/20`      <dbl> 0, 1215, 3606, 630, 446, 934, 472, 779, 1860, 906, 30...
## $ `8/10/20`     <dbl> 0, 1215, 3714, 631, 450, 947, 474, 782, 1883, 909, 30...
## $ `8/11/20`     <dbl> 0, 1215, 3736, 643, 455, 958, 489, 785, 1914, 916, 30...
## $ `8/12/20`     <dbl> 0, 1241, 3776, 646, 464, 967, 500, 788, 1935, 918, 31...
## $ `8/13/20`     <dbl> 0, 1250, 3813, 651, 469, 977, 501, 790, 1959, 919, 32...
## $ `8/14/20`     <dbl> 0, 1252, 3860, 656, 477, 989, 502, 796, 1975, 922, 32...
## $ `8/15/20`     <dbl> 0, 1262, 3909, 663, 483, 996, 503, 807, 2019, 925, 33...
## $ `8/16/20`     <dbl> 0, 1273, 3948, 671, 483, 1005, 504, 811, 2037, 927, 3...
## $ `8/17/20`     <dbl> 0, 1274, 3960, 672, 488, 1008, 504, 814, 2055, 928, 3...
## $ `8/18/20`     <dbl> 0, 1291, 3977, 674, 490, 1034, 512, 814, 2107, 937, 3...
## $ `8/19/20`     <dbl> 0, 1293, 4002, 683, 503, 1049, 530, 814, 2159, 941, 3...
## $ `8/20/20`     <dbl> 0, 1293, 4035, 690, 507, 1077, 534, 814, 2214, 949, 3...
## $ `8/21/20`     <dbl> 0, 1293, 4054, 690, 509, 1083, 534, 814, 2228, 952, 3...
## $ `8/22/20`     <dbl> 0, 1322, 4115, 699, 516, 1096, 536, 822, 2276, 957, 3...
## $ `8/23/20`     <dbl> 0, 1324, 4147, 702, 523, 1099, 536, 824, 2286, 958, 3...
## $ `8/24/20`     <dbl> 0, 1351, 4167, 720, 526, 1135, 536, 825, 2327, 971, 3...
## $ `8/25/20`     <dbl> 0, 1355, 4190, 724, 527, 1160, 536, 826, 2345, 973, 3...
## $ `8/26/20`     <dbl> 0, 1366, 4265, 732, 530, 1195, 537, 833, 2400, 983, 3...
## $ `8/27/20`     <dbl> 0, 1377, 4311, 739, 533, 1213, 538, 839, 2413, 1011, ...
## $ `8/28/20`     <dbl> 0, 1389, 4347, 745, 535, 1219, 541, 840, 2443, 1017, ...
## $ `8/29/20`     <dbl> 0, 1400, 4424, 753, 540, 1248, 546, 855, 2499, 1024, ...
## $ `8/30/20`     <dbl> 0, 1438, 4525, 757, 550, 1277, 550, 864, 2533, 1027, ...
## $ `8/31/20`     <dbl> 0, 1442, 4545, 757, 554, 1287, 551, 866, 2567, 1033, ...
## $ `9/1/20`      <dbl> 0, 1452, 4568, 764, 558, 1303, 559, 871, 2619, 1041, ...
## $ `9/2/20`      <dbl> 0, 1452, 4583, 768, 562, 1308, 561, 872, 2633, 1045, ...
## $ `9/3/20`      <dbl> 0, 1466, 4628, 771, 564, 1336, 563, 874, 2678, 1046, ...
## $ `9/4/20`      <dbl> 0, 1475, 4654, 776, 570, 1361, 563, 881, 2747, 1054, ...
## $ `9/5/20`      <dbl> 0, 1492, 4686, 776, 576, 1376, 566, 886, 2830, 1059, ...
## $ `9/6/20`      <dbl> 0, 1498, 4713, 777, 581, 1379, 568, 890, 2842, 1061, ...
## $ `9/7/20`      <dbl> 0, 1504, 4730, 778, 583, 1384, 568, 892, 2877, 1063, ...
## $ `9/8/20`      <dbl> 0, 1508, 4757, 778, 589, 1390, 568, 892, 2891, 1064, ...
## $ `9/9/20`      <dbl> 0, 1522, 4787, 778, 591, 1401, 570, 895, 2907, 1068, ...
## $ `9/10/20`     <dbl> 0, 1544, 4833, 785, 594, 1430, 572, 896, 2958, 1076, ...
## $ `9/11/20`     <dbl> 0, 1551, 4886, 786, 602, 1441, 573, 896, 2988, 1088, ...
## $ `9/12/20`     <dbl> 0, 1565, 4922, 792, 604, 1446, 574, 898, 3047, 1094, ...
## $ `9/13/20`     <dbl> 0, 1576, 4959, 794, 607, 1453, 580, 899, 3093, 1094, ...
## $ `9/14/20`     <dbl> 0, 1585, 4978, 801, 610, 1464, 580, 900, 3110, 1097, ...
## $ `9/15/20`     <dbl> 0, 1601, 4992, 806, 611, 1475, 581, 901, 3127, 1102, ...
## Observations: 760,410
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 242
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/8/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/9/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/10/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/11/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/12/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/13/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/14/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/15/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/16/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/17/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/18/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/19/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/20/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/21/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 13, 0, 0, 0, 1, 0, 0, 0, 1...
## $ `4/22/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 16, 0, 0, 0, 1, 0, 1, 0, 1...
## $ `4/23/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 16, 0, 1, 0, 1, 1, 1, 0, 1...
## $ `4/24/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 17, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/25/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/26/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/27/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/28/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 19, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/29/20`     <dbl> 0, 3, 2, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/30/20`     <dbl> 0, 3, 3, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/1/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/2/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/3/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/4/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/5/20`      <dbl> 0, 3, 5, 1, 0, 0, 0, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/6/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/7/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/8/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 3, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/9/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/10/20`     <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/11/20`     <dbl> 0, 3, 6, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/12/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 21, 0, 1, 1, 1, 1, 1, 1, 2...
## $ `5/13/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 22, 0, 1, 2, 1, 1, 1, 1, 2...
## $ `5/14/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 8, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/15/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/16/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/17/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/18/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/19/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/20/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/21/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/22/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/23/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/24/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 12, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/25/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 12, 3, 24, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/26/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/27/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/28/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 15, 3, 24, 2, 1, 8, 2, 2, 1, 1, ...
## $ `5/29/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 16, 3, 24, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/30/20`     <dbl> 0, 4, 9, 1, 1, 1, 4, 17, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/31/20`     <dbl> 0, 4, 9, 1, 1, 1, 5, 18, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `6/1/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 25, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/2/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/3/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/4/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/5/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 21, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/6/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/7/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/8/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/9/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/10/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/11/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/12/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/13/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/14/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/15/20`     <dbl> 0, 6, 9, 1, 1, 1, 9, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/16/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/17/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/18/20`     <dbl> 0, 8, 9, 1, 1, 1, 9, 25, 4, 26, 5, 3, 11, 4, 2, 1, 1,...
## $ `6/19/20`     <dbl> 0, 8, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 11, 4, 2, 1, 1...
## $ `6/20/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/21/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/22/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/23/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 4, 2, 1, 1...
## $ `6/24/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/25/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/26/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/27/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/28/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/29/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/30/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/1/20`      <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/2/20`      <dbl> 0, 13, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/3/20`      <dbl> 0, 13, 10, 2, 1, 1, 10, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/4/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/5/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/6/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/7/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/8/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/9/20`      <dbl> 0, 14, 11, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/10/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/11/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/12/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/13/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/14/20`     <dbl> 0, 18, 12, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/15/20`     <dbl> 0, 19, 13, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/16/20`     <dbl> 0, 20, 14, 3, 2, 1, 11, 32, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/17/20`     <dbl> 0, 21, 14, 3, 2, 1, 11, 33, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/18/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/19/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/20/20`     <dbl> 0, 21, 15, 4, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/21/20`     <dbl> 0, 21, 16, 4, 2, 1, 11, 34, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/22/20`     <dbl> 0, 21, 16, 4, 2, 1, 12, 34, 6, 34, 7, 5, 12, 8, 2, 1,...
## $ `7/23/20`     <dbl> 0, 21, 17, 4, 2, 1, 12, 35, 6, 34, 7, 5, 12, 9, 2, 1,...
## $ `7/24/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/25/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/26/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/27/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/28/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/29/20`     <dbl> 0, 21, 21, 4, 2, 3, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/30/20`     <dbl> 0, 21, 21, 5, 2, 3, 12, 36, 8, 38, 8, 6, 12, 9, 5, 1,...
## $ `7/31/20`     <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/1/20`      <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/2/20`      <dbl> 0, 21, 23, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/3/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/4/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 12, 38, 8, 7, 12, 9, 5, 1...
## $ `8/5/20`      <dbl> 0, 22, 24, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/6/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/7/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/8/20`      <dbl> 0, 22, 26, 5, 5, 4, 12, 37, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/9/20`      <dbl> 0, 22, 27, 5, 5, 4, 12, 37, 14, 38, 8, 8, 12, 9, 5, 1...
## $ `8/10/20`     <dbl> 0, 22, 28, 5, 5, 4, 12, 37, 17, 38, 9, 9, 12, 10, 5, ...
## $ `8/11/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 19, 38, 9, 12, 12, 10, 5,...
## $ `8/12/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/13/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/14/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/15/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/16/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/17/20`     <dbl> 0, 23, 32, 6, 6, 5, 14, 37, 23, 38, 9, 12, 12, 10, 5,...
## $ `8/18/20`     <dbl> 0, 23, 33, 6, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/19/20`     <dbl> 0, 23, 33, 7, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/20/20`     <dbl> 0, 23, 34, 7, 6, 5, 14, 37, 25, 38, 10, 12, 12, 10, 5...
## $ `8/21/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 5...
## $ `8/22/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/23/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/24/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 27, 38, 10, 13, 12, 11, 6...
## $ `8/25/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 28, 39, 10, 13, 12, 11, 6...
## $ `8/26/20`     <dbl> 0, 23, 36, 7, 6, 7, 14, 37, 28, 39, 10, 13, 12, 12, 6...
## $ `8/27/20`     <dbl> 0, 23, 37, 7, 6, 7, 14, 37, 30, 39, 12, 13, 12, 13, 6...
## $ `8/28/20`     <dbl> 0, 23, 39, 7, 6, 9, 14, 37, 32, 40, 12, 13, 12, 13, 6...
## $ `8/29/20`     <dbl> 0, 23, 40, 7, 7, 9, 14, 37, 35, 40, 12, 13, 12, 14, 6...
## $ `8/30/20`     <dbl> 0, 23, 40, 7, 7, 10, 14, 37, 35, 40, 12, 13, 12, 14, ...
## $ `8/31/20`     <dbl> 0, 23, 42, 7, 8, 11, 14, 37, 36, 40, 12, 13, 12, 14, ...
## $ `9/1/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 13, 12, 14, ...
## $ `9/2/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/3/20`      <dbl> 0, 24, 44, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/4/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/5/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/6/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/7/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/8/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/9/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/10/20`     <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 13, 16, 12, 14, ...
## $ `9/11/20`     <dbl> 0, 24, 47, 7, 9, 12, 14, 38, 38, 41, 14, 17, 12, 15, ...
## $ `9/12/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/13/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/14/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/15/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 15, 24, 12, 16, ...
## Observations: 760,410
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
# STEP 11: Compare burden in old data and new data
bind_rows(burden_20200903_new, burden_20200917, .id="source") %>%
    mutate(source=factor(case_when(source==1 ~ "2020-09-03", source==2 ~ "2020-09-17", TRUE ~ "Unknown"), 
                         levels=c("2020-09-17", "2020-09-03", "Unknown")
                         )
           ) %>%
    group_by(source, date) %>%
    summarize(cumDeaths=sum(cumDeaths), cumCases=sum(cumCases)) %>%
    pivot_longer(-c(source, date)) %>%
    ggplot(aes(x=date, y=value/1000, group=source, color=source)) + 
    geom_line() + 
    facet_wrap(~c("cumCases"="Cases", "cumDeaths"="Deaths")[name], scales="free_y") + 
    scale_x_date(date_breaks="1 months", date_labels="%m") + 
    labs(y="Burden (000s)", title="US National Coronavirus Burden by Source")

# STEP 12: Show top-level findings by segment
plotBurdenData(burden_20200917, maxDate=maxDate_20200917, minPop=10000)

# STEP 13: Create county-level clusters (k-means, 5 clusters, minimum county population 25k)
clust_20200917_new <- prepClusterCounties(burdenFile=burden_20200917, 
                                          maxDate=maxDate_20200917, 
                                          minPop=25000, 
                                          hierarchical=FALSE, 
                                          minShape=3,
                                          ratioDeathvsCase = 5,
                                          ratioTotalvsShape = 0.5,
                                          minDeath=100,
                                          minCase=5000,
                                          nCenters=5,
                                          testCenters=1:25,
                                          iter.max=20,
                                          nstart=10,
                                          seed=2009081450
                                          )
## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0283              0          0.224           0.104
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Cluster means and counts
##                  1      2     3      4      5
## .           503.00 484.00 73.00 219.00 312.00
## totalCases    0.77   0.39  1.40   1.14   0.56
## totalDeaths   1.56   0.46  9.47   4.69   1.97
## cases_3       0.01   0.02  0.06   0.01   0.02
## deaths_3      0.04   0.08  0.09   0.05   0.09
## cases_4       0.04   0.06  0.29   0.12   0.17
## deaths_4      0.32   0.53  1.61   0.76   1.46
## cases_5       0.05   0.07  0.15   0.12   0.16
## deaths_5      0.26   0.35  1.05   0.96   1.72
## cases_6       0.11   0.10  0.10   0.13   0.10
## deaths_6      0.33   0.51  0.58   0.63   0.66
## cases_7       0.34   0.23  0.20   0.29   0.20
## deaths_7      1.05   0.64  0.63   0.96   0.40
## cases_8       0.32   0.26  0.16   0.23   0.21
## deaths_8      2.09   0.52  0.78   1.22   0.42
## cases_9       0.14   0.16  0.05   0.08   0.11
## deaths_9      0.87   0.55  0.26   0.43   0.22
# STEP 14: Assess the previous clusters on the updated data
helper_test_20200917 <- helperAssessCountyClusters(vecCluster=clustVec_county_20200903_new, 
                                                   dfPop=clust_20200917_new$countyFiltered, 
                                                   dfBurden=clust_20200917_new$countyFiltered, 
                                                   thruLabel="Sep 15, 2020", 
                                                   plotsTogether=TRUE, 
                                                   showMap=TRUE, 
                                                   clusterPlotsTogether=TRUE, 
                                                   orderCluster=TRUE
                                                   )

## 
## Recency is defined as 2020-08-17 through current
## 
## Recency is defined as 2020-08-17 through current
## Warning: Removed 1 rows containing missing values (geom_point).

# STEP 15: Create normalized data
cNorm_20200917 <- helperMakeNormData(helper_test_20200917)

# STEP 16: Assess lags for early pandemic and late pandemic
lagData_early_20200917 <- helperTestLags(cNorm_20200917, minDate="2020-03-01", maxDate="2020-05-31")

## 
## The best lags are:
## # A tibble: 5 x 3
##   cluster  corr   lag
##   <fct>   <dbl> <int>
## 1 5       0.971     2
## 2 2       0.809     4
## 3 1       0.937     5
## 4 4       0.972     7
## 5 3       0.989     8

lagData_late_20200917 <- helperTestLags(cNorm_20200917, minDate="2020-06-01", maxDate="2020-09-15")

## 
## The best lags are:
## # A tibble: 5 x 3
##   cluster  corr   lag
##   <fct>   <dbl> <int>
## 1 2       0.781     6
## 2 4       0.886    16
## 3 1       0.967    19
## 4 5       0.435    23
## 5 3       0.472    26

# STEP 17: Generate a list of key counties
keyCounties_20200917 <- helper_test_20200917 %>%
    mutate(state=str_pad(state, width=5, side="left", pad="0")) %>%
    filter(pop >= 100000) %>%
    group_by(state, cluster) %>%
    summarize(dpm=sum(dpm), pop=mean(pop)) %>%
    group_by(cluster) %>%
    top_n(n=3, wt=dpm) %>%
    ungroup() %>%
    arrange(cluster, -dpm) %>%
    inner_join(select(usmap::countypop, -pop_2015), by=c("state"="fips")) %>%
    mutate(countyName=paste0(cluster, " - ", 
                             stringr::str_replace(county, "County|Parish", "("), 
                             abbr, 
                             ")"
                             )
           ) %>%
    select(-abbr, -county)

# STEP 18: Keep only key counties
cNorm_keyCounties_20200917 <- helper_test_20200917 %>%
    mutate(state=str_pad(state, width=5, side="left", pad="0")) %>%
    inner_join(select(keyCounties_20200917, state, countyName), by=c("state"="state")) %>%
    helperMakeNormData(aggBy=c("countyName", "state", "cluster"))

# STEP 19: Create early and late lags for key counties
lagData_early_keycounties_20200917 <- helperTestLags(cNorm_keyCounties_20200917, 
                                                     minDate="2020-03-01", 
                                                     maxDate="2020-05-31", 
                                                     aggBy=c("countyName", "state", "cluster"), 
                                                     maxRatio=0.25
                                                     )

## 
## The best lags are:
## # A tibble: 15 x 5
##    countyName          state cluster  corr   lag
##    <chr>               <chr> <fct>   <dbl> <int>
##  1 1 - Anderson (SC)   45007 1       0.717     0
##  2 1 - Webb (TX)       48479 1       0.642     0
##  3 2 - Douglas (NE)    31055 2       0.517     0
##  4 2 - Pulaski (AR)    05119 2       0.644     0
##  5 1 - Highlands (FL)  12055 1       0.772     1
##  6 5 - Litchfield (CT) 09005 5       0.894     4
##  7 2 - DeKalb (IL)     17037 2       0.654     5
##  8 3 - Bronx (NY)      36005 3       0.962     6
##  9 3 - Kings (NY)      36047 3       0.945     8
## 10 3 - Queens (NY)     36081 3       0.943     8
## 11 4 - Yuma (AZ)       04027 4       0.956    11
## 12 5 - Gloucester (NJ) 34015 5       0.874    12
## 13 4 - Caddo (LA)      22017 4       0.785    13
## 14 4 - Hidalgo (TX)    48215 4       0.632    17
## 15 5 - Barnstable (MA) 25001 5       0.679    21
## Warning: Removed 136 rows containing missing values (geom_path).
## Warning: Removed 136 rows containing missing values (geom_path).

lagData_late_keycounties_20200917 <- helperTestLags(cNorm_keyCounties_20200917, 
                                           minDate="2020-06-01", 
                                           maxDate="2020-09-15", 
                                           aggBy=c("countyName", "state", "cluster"), 
                                           maxRatio=0.25
                                           )

## 
## The best lags are:
## # A tibble: 15 x 5
##    countyName          state cluster   corr   lag
##    <chr>               <chr> <fct>    <dbl> <int>
##  1 5 - Litchfield (CT) 09005 5        0.179     6
##  2 1 - Highlands (FL)  12055 1        0.689    13
##  3 4 - Hidalgo (TX)    48215 4        0.758    16
##  4 2 - DeKalb (IL)     17037 2        0.130    19
##  5 2 - Douglas (NE)    31055 2        0.152    19
##  6 4 - Yuma (AZ)       04027 4        0.780    19
##  7 4 - Caddo (LA)      22017 4        0.312    24
##  8 2 - Pulaski (AR)    05119 2        0.436    25
##  9 1 - Anderson (SC)   45007 1        0.838    26
## 10 3 - Bronx (NY)      36005 3        0.755    26
## 11 3 - Kings (NY)      36047 3        0.929    26
## 12 3 - Queens (NY)     36081 3        0.734    26
## 13 5 - Gloucester (NJ) 34015 5       -0.412    29
## 14 1 - Webb (TX)       48479 1        0.738    30
## 15 5 - Barnstable (MA) 25001 5        0.560    30

# STEP 20: Run for only key counties
exploreTopCounties(helper_test_20200917, minDate="2020-03-01", maxDate="2020-05-31", nVar="pop", nKey=12)
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 12 x 5
##    state cluster countyName             dpm      pop
##    <chr> <fct>   <chr>                <dbl>    <dbl>
##  1 06037 5       5 - Los Angeles (CA)  625. 10039107
##  2 17031 4       4 - Cook (IL)         997.  5150233
##  3 48201 1       1 - Harris (TX)       518.  4713325
##  4 04013 4       4 - Maricopa (AZ)     707.  4485414
##  5 06073 2       2 - San Diego (CA)    222.  3338330
##  6 06059 1       1 - Orange (CA)       348.  3175692
##  7 12086 4       4 - Miami-Dade (FL)  1076.  2716940
##  8 48113 1       1 - Dallas (TX)       398.  2635516
##  9 36047 3       3 - Kings (NY)       2854.  2559903
## 10 06065 1       1 - Riverside (CA)    455.  2470546
## 11 32003 1       1 - Clark (NV)        568.  2266715
## 12 36081 3       3 - Queens (NY)      3208.  2253858

## 
## The best lags are:
## # A tibble: 12 x 5
##    countyName           state cluster  corr   lag
##    <chr>                <chr> <fct>   <dbl> <int>
##  1 1 - Clark (NV)       32003 1       0.799     0
##  2 1 - Dallas (TX)      48113 1       0.816     0
##  3 1 - Harris (TX)      48201 1       0.694     0
##  4 1 - Orange (CA)      06059 1       0.837     1
##  5 2 - San Diego (CA)   06073 2       0.861     1
##  6 4 - Cook (IL)        17031 4       0.972     4
##  7 4 - Maricopa (AZ)    04013 4       0.931     4
##  8 5 - Los Angeles (CA) 06037 5       0.874     4
##  9 3 - Kings (NY)       36047 3       0.945     8
## 10 3 - Queens (NY)      36081 3       0.943     8
## 11 1 - Riverside (CA)   06065 1       0.952    12
## 12 4 - Miami-Dade (FL)  12086 4       0.910    12
## Warning: Removed 23 rows containing missing values (geom_path).
## Warning: Removed 23 rows containing missing values (geom_path).

## # A tibble: 1,104 x 9
## # Groups:   countyName, state, cluster [12]
##    countyName         state cluster date         cpm7  dpm7  corr   lag cpmlag
##    <chr>              <chr> <fct>   <date>      <dbl> <dbl> <dbl> <int>  <dbl>
##  1 1 - Clark (NV)     32003 1       2020-03-01 0          0 0.799     0      0
##  2 1 - Dallas (TX)    48113 1       2020-03-01 0          0 0.816     0      0
##  3 1 - Harris (TX)    48201 1       2020-03-01 0          0 0.694     0      0
##  4 1 - Orange (CA)    06059 1       2020-03-01 0.0900     0 0.837     1     NA
##  5 1 - Riverside (CA) 06065 1       2020-03-01 0          0 0.952    12     NA
##  6 2 - San Diego (CA) 06073 2       2020-03-01 0          0 0.861     1     NA
##  7 3 - Kings (NY)     36047 3       2020-03-01 0          0 0.945     8     NA
##  8 3 - Queens (NY)    36081 3       2020-03-01 0          0 0.943     8     NA
##  9 4 - Cook (IL)      17031 4       2020-03-01 0.0555     0 0.972     4     NA
## 10 4 - Maricopa (AZ)  04013 4       2020-03-01 0          0 0.931     4     NA
## # ... with 1,094 more rows
exploreTopCounties(helper_test_20200917, minDate="2020-06-15", maxDate="2020-09-15", nVar="pop", nKey=12)
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 12 x 5
##    state cluster countyName             dpm      pop
##    <chr> <fct>   <chr>                <dbl>    <dbl>
##  1 06037 5       5 - Los Angeles (CA)  625. 10039107
##  2 17031 4       4 - Cook (IL)         997.  5150233
##  3 48201 1       1 - Harris (TX)       518.  4713325
##  4 04013 4       4 - Maricopa (AZ)     707.  4485414
##  5 06073 2       2 - San Diego (CA)    222.  3338330
##  6 06059 1       1 - Orange (CA)       348.  3175692
##  7 12086 4       4 - Miami-Dade (FL)  1076.  2716940
##  8 48113 1       1 - Dallas (TX)       398.  2635516
##  9 36047 3       3 - Kings (NY)       2854.  2559903
## 10 06065 1       1 - Riverside (CA)    455.  2470546
## 11 32003 1       1 - Clark (NV)        568.  2266715
## 12 36081 3       3 - Queens (NY)      3208.  2253858

## 
## The best lags are:
## # A tibble: 12 x 5
##    countyName           state cluster   corr   lag
##    <chr>                <chr> <fct>    <dbl> <int>
##  1 2 - San Diego (CA)   06073 2        0.771     5
##  2 3 - Kings (NY)       36047 3        0.773    12
##  3 5 - Los Angeles (CA) 06037 5        0.740    12
##  4 1 - Harris (TX)      48201 1        0.661    13
##  5 1 - Riverside (CA)   06065 1        0.505    16
##  6 4 - Maricopa (AZ)    04013 4        0.934    18
##  7 4 - Miami-Dade (FL)  12086 4        0.469    19
##  8 1 - Clark (NV)       32003 1        0.814    20
##  9 1 - Orange (CA)      06059 1        0.388    21
## 10 1 - Dallas (TX)      48113 1        0.609    30
## 11 3 - Queens (NY)      36081 3        0.768    30
## 12 4 - Cook (IL)        17031 4       -0.526    30

## # A tibble: 1,080 x 9
## # Groups:   countyName, state, cluster [12]
##    countyName         state cluster date        cpm7  dpm7   corr   lag cpmlag
##    <chr>              <chr> <fct>   <date>     <dbl> <dbl>  <dbl> <int>  <dbl>
##  1 1 - Clark (NV)     32003 1       2020-06-15  90.4 0.756  0.814    20     NA
##  2 1 - Dallas (TX)    48113 1       2020-06-15 125.  1.52   0.609    30     NA
##  3 1 - Harris (TX)    48201 1       2020-06-15  79.0 0.940  0.661    13     NA
##  4 1 - Orange (CA)    06059 1       2020-06-15  58.7 2.16   0.388    21     NA
##  5 1 - Riverside (CA) 06065 1       2020-06-15 129.  1.79   0.505    16     NA
##  6 2 - San Diego (CA) 06073 2       2020-06-15  46.8 0.984  0.771     5     NA
##  7 3 - Kings (NY)     36047 3       2020-06-15  50.1 4.41   0.773    12     NA
##  8 3 - Queens (NY)    36081 3       2020-06-15  53.0 3.74   0.768    30     NA
##  9 4 - Cook (IL)      17031 4       2020-06-15  61.1 6.16  -0.526    30     NA
## 10 4 - Maricopa (AZ)  04013 4       2020-06-15 250.  2.10   0.934    18     NA
## # ... with 1,070 more rows

The process is further converted to functional form as follows:

  1. Capability to either start the process from scratch or to leverage existing clusters at the state or county level
  2. Add the capability to compare a newly read file to an existing file
  3. Add the capability to show burden by existing segment after reading in an existing file
  4. Add the capability to prepare the data (create countyFiltered) without running the clustering proces
  5. Add the cumulative burden chart to helperAssessCountyClusters

New data are downloaed from 2020-10-03, and the updated code includes:

# STEP 1a: Define the locations for the population, cases, and deaths file
popFile <- "./RInputFiles/Coronavirus/covid_county_population_usafacts.csv"
caseFile_20201003 <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20201003.csv"
deathFile_20201003 <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20201003.csv"
maxDate_20201003 <- "2020-09-30"

# STEP 1b: Read and pivot data from USA Facts; extract population data file as pop_usafacts
rawUSAFacts_20201003 <- readPivotUSAFacts(popFile=popFile, 
                                          caseFile=caseFile_20201003, 
                                          deathFile=deathFile_20201003, 
                                          unassignedDate=maxDate_20201003
                                          )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Joining, by = c("state", "date", "unassigned")

pop_usafacts <- rawUSAFacts_20201003$pop

# STEP 2: Read case and death data (redundant), combine, and add population totals
# Add previous clusters by county rather than by state (function readUSAFacts updated)
burden_20201003 <- readUSAFacts(
    caseFile=caseFile_20201003, 
    deathFile=deathFile_20201003, 
    oldFile=burden_20200903_new,
    showBurdenMinPop=10000,
    maxDate=maxDate_20201003,
    stateClusters=NULL, 
    countyClusters=clustVec_county_20200903_new
    )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 258
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 1, 4, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 4, 4, 0, 0, 1, 0, 1, 2, 10, 1, 1, 0, 0, 1, 1, 0, 1...
## $ `3/26/20`     <dbl> 0, 6, 5, 0, 0, 2, 2, 1, 2, 13, 1, 4, 1, 0, 1, 1, 0, 1...
## $ `3/27/20`     <dbl> 0, 6, 5, 0, 0, 5, 2, 1, 3, 15, 1, 7, 1, 0, 1, 3, 0, 1...
## $ `3/28/20`     <dbl> 0, 6, 10, 0, 0, 5, 2, 1, 3, 17, 1, 7, 1, 0, 2, 4, 0, ...
## $ `3/29/20`     <dbl> 0, 6, 15, 0, 0, 5, 2, 1, 3, 27, 2, 8, 1, 0, 2, 5, 0, ...
## $ `3/30/20`     <dbl> 0, 7, 18, 0, 2, 5, 2, 1, 9, 36, 2, 10, 2, 0, 2, 5, 0,...
## $ `3/31/20`     <dbl> 0, 7, 19, 0, 3, 5, 2, 1, 9, 36, 2, 11, 3, 0, 2, 5, 0,...
## $ `4/1/20`      <dbl> 0, 10, 23, 0, 3, 5, 2, 1, 11, 45, 2, 13, 4, 2, 3, 6, ...
## $ `4/2/20`      <dbl> 0, 10, 25, 0, 4, 6, 2, 1, 12, 67, 4, 14, 4, 2, 7, 6, ...
## $ `4/3/20`      <dbl> 0, 12, 28, 1, 4, 9, 2, 1, 20, 81, 5, 15, 4, 3, 8, 7, ...
## $ `4/4/20`      <dbl> 0, 12, 29, 2, 4, 10, 2, 1, 21, 87, 6, 15, 4, 7, 9, 7,...
## $ `4/5/20`      <dbl> 0, 12, 34, 2, 7, 10, 2, 1, 24, 90, 6, 18, 5, 9, 9, 7,...
## $ `4/6/20`      <dbl> 0, 12, 38, 3, 7, 10, 2, 1, 38, 96, 6, 20, 6, 9, 9, 9,...
## $ `4/7/20`      <dbl> 0, 12, 42, 3, 8, 10, 2, 2, 48, 102, 6, 20, 6, 10, 9, ...
## $ `4/8/20`      <dbl> 0, 12, 49, 3, 9, 10, 3, 3, 52, 140, 7, 22, 6, 10, 11,...
## $ `4/9/20`      <dbl> 0, 17, 59, 7, 11, 11, 4, 3, 54, 161, 7, 25, 6, 13, 11...
## $ `4/10/20`     <dbl> 0, 17, 59, 9, 11, 12, 4, 3, 54, 171, 7, 27, 7, 13, 11...
## $ `4/11/20`     <dbl> 0, 19, 66, 10, 13, 12, 4, 6, 57, 184, 7, 30, 9, 15, 1...
## $ `4/12/20`     <dbl> 0, 19, 71, 10, 16, 13, 4, 7, 60, 200, 9, 30, 10, 19, ...
## $ `4/13/20`     <dbl> 0, 19, 78, 10, 17, 15, 6, 8, 61, 212, 9, 33, 10, 19, ...
## $ `4/14/20`     <dbl> 0, 23, 87, 11, 17, 16, 8, 8, 62, 216, 9, 33, 12, 21, ...
## $ `4/15/20`     <dbl> 0, 25, 98, 13, 19, 17, 8, 11, 62, 227, 10, 37, 13, 22...
## $ `4/16/20`     <dbl> 0, 25, 102, 14, 23, 18, 8, 11, 63, 234, 11, 37, 13, 2...
## $ `4/17/20`     <dbl> 0, 25, 103, 15, 23, 20, 8, 13, 63, 236, 12, 37, 13, 2...
## $ `4/18/20`     <dbl> 0, 25, 109, 18, 26, 20, 9, 13, 66, 240, 12, 39, 14, 2...
## $ `4/19/20`     <dbl> 0, 27, 114, 20, 28, 21, 9, 14, 72, 246, 12, 42, 14, 2...
## $ `4/20/20`     <dbl> 0, 28, 117, 22, 32, 22, 11, 14, 80, 257, 12, 43, 17, ...
## $ `4/21/20`     <dbl> 0, 30, 123, 28, 32, 26, 11, 15, 83, 259, 12, 44, 18, ...
## $ `4/22/20`     <dbl> 0, 32, 132, 29, 33, 29, 11, 17, 85, 270, 12, 46, 21, ...
## $ `4/23/20`     <dbl> 0, 33, 143, 30, 33, 31, 12, 19, 88, 275, 12, 47, 22, ...
## $ `4/24/20`     <dbl> 0, 36, 147, 32, 34, 31, 12, 21, 89, 282, 12, 49, 25, ...
## $ `4/25/20`     <dbl> 0, 37, 154, 33, 35, 31, 12, 28, 90, 284, 12, 49, 27, ...
## $ `4/26/20`     <dbl> 0, 37, 161, 33, 38, 34, 12, 32, 90, 285, 14, 51, 32, ...
## $ `4/27/20`     <dbl> 0, 39, 168, 35, 42, 34, 12, 34, 90, 289, 14, 51, 39, ...
## $ `4/28/20`     <dbl> 0, 40, 171, 37, 42, 34, 12, 45, 92, 290, 15, 52, 39, ...
## $ `4/29/20`     <dbl> 0, 42, 173, 37, 42, 36, 12, 51, 93, 290, 15, 52, 39, ...
## $ `4/30/20`     <dbl> 0, 42, 174, 39, 42, 37, 13, 53, 93, 290, 15, 52, 43, ...
## $ `5/1/20`      <dbl> 0, 42, 175, 42, 42, 39, 14, 65, 93, 290, 15, 52, 49, ...
## $ `5/2/20`      <dbl> 0, 45, 181, 43, 42, 40, 14, 92, 98, 294, 15, 54, 49, ...
## $ `5/3/20`      <dbl> 0, 48, 187, 45, 42, 40, 14, 105, 105, 300, 16, 57, 49...
## $ `5/4/20`      <dbl> 0, 53, 188, 45, 42, 40, 16, 114, 105, 302, 16, 58, 51...
## $ `5/5/20`      <dbl> 0, 53, 189, 47, 43, 40, 18, 120, 114, 304, 17, 60, 54...
## $ `5/6/20`      <dbl> 0, 58, 196, 47, 43, 42, 18, 130, 114, 306, 18, 61, 54...
## $ `5/7/20`      <dbl> 0, 61, 205, 51, 44, 44, 18, 155, 120, 308, 18, 63, 56...
## $ `5/8/20`      <dbl> 0, 67, 208, 53, 44, 44, 21, 162, 123, 311, 21, 63, 59...
## $ `5/9/20`      <dbl> 0, 68, 216, 58, 45, 44, 22, 178, 124, 314, 22, 64, 61...
## $ `5/10/20`     <dbl> 0, 74, 222, 59, 46, 44, 23, 189, 124, 316, 22, 65, 66...
## $ `5/11/20`     <dbl> 0, 84, 224, 61, 46, 45, 26, 196, 125, 319, 24, 67, 67...
## $ `5/12/20`     <dbl> 0, 91, 227, 67, 46, 45, 26, 224, 126, 324, 24, 69, 69...
## $ `5/13/20`     <dbl> 0, 93, 231, 69, 46, 45, 28, 230, 127, 324, 24, 73, 72...
## $ `5/14/20`     <dbl> 0, 103, 243, 74, 46, 45, 28, 249, 128, 326, 25, 74, 7...
## $ `5/15/20`     <dbl> 0, 103, 244, 79, 49, 45, 32, 258, 129, 326, 26, 75, 8...
## $ `5/16/20`     <dbl> 0, 110, 254, 79, 50, 45, 35, 271, 130, 328, 27, 77, 8...
## $ `5/17/20`     <dbl> 0, 110, 254, 81, 50, 46, 35, 272, 130, 328, 27, 77, 8...
## $ `5/18/20`     <dbl> 0, 120, 260, 85, 50, 47, 40, 285, 133, 329, 28, 79, 8...
## $ `5/19/20`     <dbl> 0, 127, 262, 90, 51, 47, 52, 295, 133, 329, 29, 80, 9...
## $ `5/20/20`     <dbl> 0, 136, 270, 96, 52, 47, 64, 312, 136, 330, 30, 83, 1...
## $ `5/21/20`     <dbl> 0, 147, 270, 100, 52, 48, 71, 321, 136, 330, 31, 84, ...
## $ `5/22/20`     <dbl> 0, 149, 271, 104, 55, 49, 89, 329, 137, 330, 33, 85, ...
## $ `5/23/20`     <dbl> 0, 155, 273, 105, 58, 49, 105, 335, 138, 330, 33, 86,...
## $ `5/24/20`     <dbl> 0, 159, 274, 110, 59, 49, 111, 344, 141, 336, 33, 87,...
## $ `5/25/20`     <dbl> 0, 173, 276, 116, 62, 49, 141, 368, 147, 337, 33, 87,...
## $ `5/26/20`     <dbl> 0, 189, 277, 122, 66, 51, 167, 380, 150, 338, 33, 90,...
## $ `5/27/20`     <dbl> 0, 192, 281, 130, 71, 53, 176, 391, 152, 340, 33, 93,...
## $ `5/28/20`     <dbl> 0, 205, 281, 132, 71, 58, 185, 392, 152, 349, 34, 97,...
## $ `5/29/20`     <dbl> 0, 212, 282, 147, 71, 60, 201, 396, 153, 352, 36, 99,...
## $ `5/30/20`     <dbl> 0, 216, 283, 150, 72, 61, 203, 402, 154, 353, 37, 100...
## $ `5/31/20`     <dbl> 0, 220, 288, 164, 75, 62, 209, 410, 157, 355, 37, 100...
## $ `6/1/20`      <dbl> 0, 233, 292, 172, 76, 63, 209, 414, 164, 358, 38, 103...
## $ `6/2/20`      <dbl> 0, 238, 292, 175, 76, 63, 212, 416, 165, 358, 38, 104...
## $ `6/3/20`      <dbl> 0, 239, 292, 177, 76, 63, 215, 419, 165, 359, 38, 105...
## $ `6/4/20`      <dbl> 0, 241, 293, 177, 76, 63, 217, 421, 167, 360, 38, 107...
## $ `6/5/20`      <dbl> 0, 248, 296, 183, 76, 64, 219, 431, 169, 363, 38, 108...
## $ `6/6/20`      <dbl> 0, 259, 304, 190, 77, 70, 225, 442, 174, 373, 40, 108...
## $ `6/7/20`      <dbl> 0, 265, 313, 193, 77, 72, 232, 449, 176, 378, 42, 110...
## $ `6/8/20`      <dbl> 0, 272, 320, 197, 79, 73, 238, 455, 178, 383, 42, 111...
## $ `6/9/20`      <dbl> 0, 282, 325, 199, 85, 75, 243, 464, 180, 391, 42, 117...
## $ `6/10/20`     <dbl> 0, 295, 331, 208, 89, 79, 248, 471, 182, 401, 42, 118...
## $ `6/11/20`     <dbl> 0, 312, 343, 214, 93, 87, 253, 484, 184, 417, 42, 121...
## $ `6/12/20`     <dbl> 0, 323, 353, 221, 97, 95, 258, 499, 188, 427, 46, 122...
## $ `6/13/20`     <dbl> 0, 331, 361, 226, 100, 102, 276, 517, 190, 438, 47, 1...
## $ `6/14/20`     <dbl> 0, 357, 364, 234, 104, 110, 302, 536, 195, 453, 51, 1...
## $ `6/15/20`     <dbl> 0, 368, 383, 238, 111, 116, 307, 544, 204, 475, 53, 1...
## $ `6/16/20`     <dbl> 0, 373, 389, 245, 116, 121, 310, 551, 206, 485, 53, 1...
## $ `6/17/20`     <dbl> 0, 375, 392, 251, 118, 123, 313, 554, 208, 486, 53, 1...
## $ `6/18/20`     <dbl> 0, 400, 401, 263, 121, 130, 320, 566, 210, 501, 55, 1...
## $ `6/19/20`     <dbl> 0, 411, 413, 266, 126, 139, 320, 569, 210, 507, 58, 1...
## $ `6/20/20`     <dbl> 0, 431, 420, 272, 126, 143, 327, 572, 211, 516, 58, 1...
## $ `6/21/20`     <dbl> 0, 434, 430, 272, 127, 149, 327, 576, 213, 521, 58, 1...
## $ `6/22/20`     <dbl> 0, 442, 437, 277, 129, 153, 328, 578, 215, 528, 58, 1...
## $ `6/23/20`     <dbl> 0, 453, 450, 280, 135, 159, 329, 581, 216, 534, 58, 1...
## $ `6/24/20`     <dbl> 0, 469, 464, 288, 141, 168, 336, 584, 220, 543, 58, 1...
## $ `6/25/20`     <dbl> 0, 479, 477, 305, 149, 176, 351, 588, 233, 549, 64, 1...
## $ `6/26/20`     <dbl> 0, 488, 515, 312, 153, 184, 351, 594, 236, 559, 68, 1...
## $ `6/27/20`     <dbl> 0, 498, 555, 317, 161, 188, 358, 600, 245, 561, 69, 2...
## $ `6/28/20`     <dbl> 0, 503, 575, 317, 162, 189, 358, 602, 245, 561, 70, 2...
## $ `6/29/20`     <dbl> 0, 527, 643, 322, 165, 199, 365, 605, 269, 585, 73, 2...
## $ `6/30/20`     <dbl> 0, 537, 680, 325, 170, 208, 365, 607, 276, 590, 74, 2...
## $ `7/1/20`      <dbl> 0, 553, 703, 326, 174, 218, 367, 607, 278, 595, 77, 2...
## $ `7/2/20`      <dbl> 0, 561, 751, 335, 179, 222, 369, 610, 288, 611, 82, 2...
## $ `7/3/20`      <dbl> 0, 568, 845, 348, 189, 230, 372, 625, 330, 625, 88, 2...
## $ `7/4/20`      <dbl> 0, 591, 863, 350, 190, 234, 373, 626, 340, 637, 88, 2...
## $ `7/5/20`      <dbl> 0, 615, 881, 352, 193, 239, 373, 634, 362, 642, 100, ...
## $ `7/6/20`      <dbl> 0, 618, 911, 356, 197, 247, 373, 634, 384, 655, 105, ...
## $ `7/7/20`      <dbl> 0, 644, 997, 360, 199, 255, 373, 634, 395, 656, 106, ...
## $ `7/8/20`      <dbl> 0, 651, 1056, 366, 201, 262, 374, 639, 411, 660, 114,...
## $ `7/9/20`      <dbl> 0, 661, 1131, 371, 211, 282, 375, 646, 445, 672, 115,...
## $ `7/10/20`     <dbl> 0, 670, 1187, 381, 218, 292, 381, 648, 465, 679, 118,...
## $ `7/11/20`     <dbl> 0, 684, 1224, 398, 224, 307, 382, 654, 500, 690, 128,...
## $ `7/12/20`     <dbl> 0, 706, 1294, 403, 228, 331, 383, 655, 526, 693, 129,...
## $ `7/13/20`     <dbl> 0, 728, 1359, 413, 231, 350, 383, 660, 566, 702, 136,...
## $ `7/14/20`     <dbl> 0, 746, 1414, 428, 236, 366, 385, 661, 589, 712, 140,...
## $ `7/15/20`     <dbl> 0, 756, 1518, 441, 242, 389, 386, 664, 655, 718, 145,...
## $ `7/16/20`     <dbl> 0, 780, 1599, 459, 247, 424, 389, 669, 675, 731, 152,...
## $ `7/17/20`     <dbl> 0, 789, 1689, 463, 255, 440, 393, 672, 720, 742, 157,...
## $ `7/18/20`     <dbl> 0, 827, 1819, 483, 264, 458, 397, 678, 741, 756, 165,...
## $ `7/19/20`     <dbl> 0, 842, 1937, 495, 269, 482, 398, 686, 785, 762, 173,...
## $ `7/20/20`     <dbl> 0, 857, 2013, 503, 279, 507, 400, 689, 832, 767, 179,...
## $ `7/21/20`     <dbl> 0, 865, 2102, 514, 283, 524, 401, 695, 869, 774, 182,...
## $ `7/22/20`     <dbl> 0, 886, 2196, 518, 287, 547, 407, 701, 891, 782, 184,...
## $ `7/23/20`     <dbl> 0, 905, 2461, 534, 289, 585, 408, 706, 934, 789, 193,...
## $ `7/24/20`     <dbl> 0, 921, 2513, 539, 303, 615, 411, 711, 999, 797, 205,...
## $ `7/25/20`     <dbl> 0, 932, 2662, 552, 318, 637, 414, 720, 1062, 810, 207...
## $ `7/26/20`     <dbl> 0, 942, 2708, 562, 324, 646, 415, 724, 1113, 821, 209...
## $ `7/27/20`     <dbl> 0, 965, 2770, 569, 334, 669, 416, 730, 1194, 825, 220...
## $ `7/28/20`     <dbl> 0, 974, 2835, 575, 337, 675, 429, 734, 1243, 836, 221...
## $ `7/29/20`     <dbl> 0, 974, 2835, 575, 338, 675, 429, 734, 1244, 836, 221...
## $ `7/30/20`     <dbl> 0, 1002, 3028, 585, 352, 731, 435, 747, 1336, 848, 23...
## $ `7/31/20`     <dbl> 0, 1015, 3101, 598, 363, 767, 437, 753, 1450, 859, 23...
## $ `8/1/20`      <dbl> 0, 1030, 3142, 602, 368, 792, 443, 757, 1480, 861, 24...
## $ `8/2/20`      <dbl> 0, 1052, 3223, 610, 372, 813, 445, 765, 1580, 868, 25...
## $ `8/3/20`      <dbl> 0, 1066, 3265, 612, 382, 830, 446, 766, 1612, 875, 26...
## $ `8/4/20`      <dbl> 0, 1073, 3320, 614, 389, 836, 449, 766, 1646, 882, 26...
## $ `8/5/20`      <dbl> 0, 1073, 3380, 615, 392, 839, 452, 769, 1683, 886, 27...
## $ `8/6/20`      <dbl> 0, 1096, 3438, 619, 421, 874, 458, 771, 1741, 893, 28...
## $ `8/7/20`      <dbl> 0, 1113, 3504, 624, 424, 909, 462, 774, 1777, 899, 29...
## $ `8/8/20`      <dbl> 0, 1134, 3564, 628, 434, 923, 471, 773, 1836, 904, 29...
## $ `8/9/20`      <dbl> 0, 1215, 3606, 630, 446, 934, 472, 779, 1860, 906, 30...
## $ `8/10/20`     <dbl> 0, 1215, 3714, 631, 450, 947, 474, 782, 1883, 909, 30...
## $ `8/11/20`     <dbl> 0, 1215, 3736, 643, 455, 958, 489, 785, 1914, 916, 30...
## $ `8/12/20`     <dbl> 0, 1241, 3776, 646, 464, 967, 500, 788, 1935, 918, 31...
## $ `8/13/20`     <dbl> 0, 1250, 3813, 651, 469, 977, 501, 790, 1959, 919, 32...
## $ `8/14/20`     <dbl> 0, 1252, 3860, 656, 477, 989, 502, 796, 1975, 922, 32...
## $ `8/15/20`     <dbl> 0, 1262, 3909, 663, 483, 996, 503, 807, 2019, 925, 33...
## $ `8/16/20`     <dbl> 0, 1273, 3948, 671, 483, 1005, 504, 811, 2037, 927, 3...
## $ `8/17/20`     <dbl> 0, 1274, 3960, 672, 488, 1008, 504, 814, 2055, 928, 3...
## $ `8/18/20`     <dbl> 0, 1291, 3977, 674, 490, 1034, 512, 814, 2107, 937, 3...
## $ `8/19/20`     <dbl> 0, 1293, 4002, 683, 503, 1049, 530, 814, 2159, 941, 3...
## $ `8/20/20`     <dbl> 0, 1293, 4035, 690, 507, 1077, 534, 814, 2214, 949, 3...
## $ `8/21/20`     <dbl> 0, 1293, 4054, 690, 509, 1083, 534, 814, 2228, 952, 3...
## $ `8/22/20`     <dbl> 0, 1322, 4115, 699, 516, 1096, 536, 822, 2276, 957, 3...
## $ `8/23/20`     <dbl> 0, 1324, 4147, 702, 523, 1099, 536, 824, 2286, 958, 3...
## $ `8/24/20`     <dbl> 0, 1351, 4167, 720, 526, 1135, 536, 825, 2327, 971, 3...
## $ `8/25/20`     <dbl> 0, 1355, 4190, 724, 527, 1160, 536, 826, 2345, 973, 3...
## $ `8/26/20`     <dbl> 0, 1366, 4265, 732, 530, 1195, 537, 833, 2400, 983, 3...
## $ `8/27/20`     <dbl> 0, 1377, 4311, 739, 533, 1213, 538, 839, 2413, 1011, ...
## $ `8/28/20`     <dbl> 0, 1389, 4347, 745, 535, 1219, 541, 840, 2443, 1017, ...
## $ `8/29/20`     <dbl> 0, 1400, 4424, 753, 540, 1248, 546, 855, 2499, 1024, ...
## $ `8/30/20`     <dbl> 0, 1438, 4525, 757, 550, 1277, 550, 864, 2533, 1027, ...
## $ `8/31/20`     <dbl> 0, 1442, 4545, 757, 554, 1287, 551, 866, 2567, 1033, ...
## $ `9/1/20`      <dbl> 0, 1452, 4568, 764, 558, 1303, 559, 871, 2619, 1041, ...
## $ `9/2/20`      <dbl> 0, 1452, 4583, 768, 562, 1308, 561, 872, 2633, 1045, ...
## $ `9/3/20`      <dbl> 0, 1466, 4628, 771, 564, 1336, 563, 874, 2678, 1046, ...
## $ `9/4/20`      <dbl> 0, 1475, 4654, 776, 570, 1361, 563, 881, 2747, 1054, ...
## $ `9/5/20`      <dbl> 0, 1492, 4686, 776, 576, 1376, 566, 886, 2830, 1059, ...
## $ `9/6/20`      <dbl> 0, 1498, 4713, 777, 581, 1379, 568, 890, 2842, 1061, ...
## $ `9/7/20`      <dbl> 0, 1504, 4730, 778, 583, 1384, 568, 892, 2877, 1063, ...
## $ `9/8/20`      <dbl> 0, 1508, 4757, 778, 589, 1390, 568, 892, 2891, 1064, ...
## $ `9/9/20`      <dbl> 0, 1522, 4787, 778, 591, 1401, 570, 895, 2907, 1068, ...
## $ `9/10/20`     <dbl> 0, 1544, 4833, 785, 594, 1430, 572, 896, 2958, 1076, ...
## $ `9/11/20`     <dbl> 0, 1551, 4886, 786, 602, 1441, 573, 896, 2988, 1088, ...
## $ `9/12/20`     <dbl> 0, 1565, 4922, 792, 604, 1446, 574, 898, 3047, 1094, ...
## $ `9/13/20`     <dbl> 0, 1576, 4959, 794, 607, 1453, 580, 899, 3093, 1094, ...
## $ `9/14/20`     <dbl> 0, 1585, 4978, 801, 610, 1464, 580, 900, 3110, 1097, ...
## $ `9/15/20`     <dbl> 0, 1601, 4992, 806, 611, 1475, 581, 901, 3127, 1102, ...
## $ `9/16/20`     <dbl> 0, 1619, 5003, 809, 612, 1487, 583, 901, 3165, 1106, ...
## $ `9/17/20`     <dbl> 0, 1624, 5021, 809, 617, 1504, 585, 902, 3211, 1106, ...
## $ `9/18/20`     <dbl> 0, 1664, 5033, 824, 619, 1527, 585, 906, 3249, 1117, ...
## $ `9/19/20`     <dbl> 0, 1673, 5047, 830, 628, 1542, 585, 908, 3320, 1123, ...
## $ `9/20/20`     <dbl> 0, 1690, 5061, 835, 632, 1551, 587, 909, 3338, 1130, ...
## $ `9/21/20`     <dbl> 0, 1691, 5087, 838, 635, 1560, 591, 911, 3374, 1132, ...
## $ `9/22/20`     <dbl> 0, 1714, 5124, 848, 635, 1573, 593, 911, 3390, 1140, ...
## $ `9/23/20`     <dbl> 0, 1715, 5141, 851, 638, 1580, 597, 911, 3401, 1144, ...
## $ `9/24/20`     <dbl> 1074, 1715, 5141, 851, 638, 1580, 597, 911, 3401, 114...
## $ `9/25/20`     <dbl> 0, 1757, 5456, 873, 652, 1608, 599, 912, 3499, 1161, ...
## $ `9/26/20`     <dbl> 0, 1764, 5477, 882, 654, 1611, 604, 912, 3515, 1164, ...
## $ `9/27/20`     <dbl> 0, 1773, 5526, 885, 656, 1617, 606, 913, 3534, 1168, ...
## $ `9/28/20`     <dbl> 0, 1785, 5588, 886, 657, 1618, 607, 914, 3548, 1172, ...
## $ `9/29/20`     <dbl> 0, 1787, 5606, 886, 658, 1621, 607, 917, 3556, 1175, ...
## $ `9/30/20`     <dbl> 0, 1791, 5640, 896, 664, 1629, 610, 917, 3569, 1179, ...
## $ `10/1/20`     <dbl> 0, 1798, 5997, 898, 672, 1634, 612, 919, 3587, 1181, ...
## Observations: 811,530
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 258
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/8/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/9/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/10/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/11/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/12/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/13/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/14/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/15/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/16/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/17/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/18/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/19/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/20/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/21/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 13, 0, 0, 0, 1, 0, 0, 0, 1...
## $ `4/22/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 16, 0, 0, 0, 1, 0, 1, 0, 1...
## $ `4/23/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 16, 0, 1, 0, 1, 1, 1, 0, 1...
## $ `4/24/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 17, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/25/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/26/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/27/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/28/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 19, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/29/20`     <dbl> 0, 3, 2, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/30/20`     <dbl> 0, 3, 3, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/1/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/2/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/3/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/4/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/5/20`      <dbl> 0, 3, 5, 1, 0, 0, 0, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/6/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/7/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/8/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 3, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/9/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/10/20`     <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/11/20`     <dbl> 0, 3, 6, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/12/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 21, 0, 1, 1, 1, 1, 1, 1, 2...
## $ `5/13/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 22, 0, 1, 2, 1, 1, 1, 1, 2...
## $ `5/14/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 8, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/15/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/16/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/17/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/18/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/19/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/20/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/21/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/22/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/23/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/24/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 12, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/25/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 12, 3, 24, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/26/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/27/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/28/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 15, 3, 24, 2, 1, 8, 2, 2, 1, 1, ...
## $ `5/29/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 16, 3, 24, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/30/20`     <dbl> 0, 4, 9, 1, 1, 1, 4, 17, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/31/20`     <dbl> 0, 4, 9, 1, 1, 1, 5, 18, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `6/1/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 25, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/2/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/3/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/4/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/5/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 21, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/6/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/7/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/8/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/9/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/10/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/11/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/12/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/13/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/14/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/15/20`     <dbl> 0, 6, 9, 1, 1, 1, 9, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/16/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/17/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/18/20`     <dbl> 0, 8, 9, 1, 1, 1, 9, 25, 4, 26, 5, 3, 11, 4, 2, 1, 1,...
## $ `6/19/20`     <dbl> 0, 8, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 11, 4, 2, 1, 1...
## $ `6/20/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/21/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/22/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/23/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 4, 2, 1, 1...
## $ `6/24/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/25/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/26/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/27/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/28/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/29/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/30/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/1/20`      <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/2/20`      <dbl> 0, 13, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/3/20`      <dbl> 0, 13, 10, 2, 1, 1, 10, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/4/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/5/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/6/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/7/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/8/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/9/20`      <dbl> 0, 14, 11, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/10/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/11/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/12/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/13/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/14/20`     <dbl> 0, 18, 12, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/15/20`     <dbl> 0, 19, 13, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/16/20`     <dbl> 0, 20, 14, 3, 2, 1, 11, 32, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/17/20`     <dbl> 0, 21, 14, 3, 2, 1, 11, 33, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/18/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/19/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/20/20`     <dbl> 0, 21, 15, 4, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/21/20`     <dbl> 0, 21, 16, 4, 2, 1, 11, 34, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/22/20`     <dbl> 0, 21, 16, 4, 2, 1, 12, 34, 6, 34, 7, 5, 12, 8, 2, 1,...
## $ `7/23/20`     <dbl> 0, 21, 17, 4, 2, 1, 12, 35, 6, 34, 7, 5, 12, 9, 2, 1,...
## $ `7/24/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/25/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/26/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/27/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/28/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/29/20`     <dbl> 0, 21, 21, 4, 2, 3, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/30/20`     <dbl> 0, 21, 21, 5, 2, 3, 12, 36, 8, 38, 8, 6, 12, 9, 5, 1,...
## $ `7/31/20`     <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/1/20`      <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/2/20`      <dbl> 0, 21, 23, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/3/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/4/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 12, 38, 8, 7, 12, 9, 5, 1...
## $ `8/5/20`      <dbl> 0, 22, 24, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/6/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/7/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/8/20`      <dbl> 0, 22, 26, 5, 5, 4, 12, 37, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/9/20`      <dbl> 0, 22, 27, 5, 5, 4, 12, 37, 14, 38, 8, 8, 12, 9, 5, 1...
## $ `8/10/20`     <dbl> 0, 22, 28, 5, 5, 4, 12, 37, 17, 38, 9, 9, 12, 10, 5, ...
## $ `8/11/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 19, 38, 9, 12, 12, 10, 5,...
## $ `8/12/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/13/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/14/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/15/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/16/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/17/20`     <dbl> 0, 23, 32, 6, 6, 5, 14, 37, 23, 38, 9, 12, 12, 10, 5,...
## $ `8/18/20`     <dbl> 0, 23, 33, 6, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/19/20`     <dbl> 0, 23, 33, 7, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/20/20`     <dbl> 0, 23, 34, 7, 6, 5, 14, 37, 25, 38, 10, 12, 12, 10, 5...
## $ `8/21/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 5...
## $ `8/22/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/23/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/24/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 27, 38, 10, 13, 12, 11, 6...
## $ `8/25/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 28, 39, 10, 13, 12, 11, 6...
## $ `8/26/20`     <dbl> 0, 23, 36, 7, 6, 7, 14, 37, 28, 39, 10, 13, 12, 12, 6...
## $ `8/27/20`     <dbl> 0, 23, 37, 7, 6, 7, 14, 37, 30, 39, 12, 13, 12, 13, 6...
## $ `8/28/20`     <dbl> 0, 23, 39, 7, 6, 9, 14, 37, 32, 40, 12, 13, 12, 13, 6...
## $ `8/29/20`     <dbl> 0, 23, 40, 7, 7, 9, 14, 37, 35, 40, 12, 13, 12, 14, 6...
## $ `8/30/20`     <dbl> 0, 23, 40, 7, 7, 10, 14, 37, 35, 40, 12, 13, 12, 14, ...
## $ `8/31/20`     <dbl> 0, 23, 42, 7, 8, 11, 14, 37, 36, 40, 12, 13, 12, 14, ...
## $ `9/1/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 13, 12, 14, ...
## $ `9/2/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/3/20`      <dbl> 0, 24, 44, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/4/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/5/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/6/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/7/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/8/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/9/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/10/20`     <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 13, 16, 12, 14, ...
## $ `9/11/20`     <dbl> 0, 24, 47, 7, 9, 12, 14, 38, 38, 41, 14, 17, 12, 15, ...
## $ `9/12/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/13/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/14/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/15/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 15, 24, 12, 16, ...
## $ `9/16/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 15, 24, 12, 16, ...
## $ `9/17/20`     <dbl> 0, 24, 48, 7, 9, 13, 14, 38, 39, 42, 15, 25, 12, 17, ...
## $ `9/18/20`     <dbl> 0, 24, 48, 7, 10, 14, 14, 38, 39, 42, 15, 27, 12, 17,...
## $ `9/19/20`     <dbl> 0, 24, 49, 7, 10, 14, 14, 38, 39, 42, 15, 27, 12, 17,...
## $ `9/20/20`     <dbl> 0, 24, 49, 7, 10, 14, 14, 38, 39, 42, 15, 27, 12, 17,...
## $ `9/21/20`     <dbl> 0, 24, 49, 7, 10, 14, 14, 38, 39, 42, 15, 27, 12, 17,...
## $ `9/22/20`     <dbl> 0, 25, 49, 7, 10, 14, 14, 38, 40, 42, 15, 28, 12, 17,...
## $ `9/23/20`     <dbl> 0, 25, 49, 7, 10, 15, 14, 39, 40, 42, 15, 30, 12, 17,...
## $ `9/24/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 42, 42, 13, 30, 12, 16,...
## $ `9/25/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 44, 42, 13, 30, 12, 16,...
## $ `9/26/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 44, 42, 13, 30, 12, 16,...
## $ `9/27/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 44, 42, 13, 30, 12, 16,...
## $ `9/28/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 44, 42, 13, 30, 12, 16,...
## $ `9/29/20`     <dbl> 0, 27, 50, 7, 10, 15, 14, 40, 44, 42, 13, 30, 12, 16,...
## $ `9/30/20`     <dbl> 0, 27, 52, 7, 10, 15, 15, 40, 44, 42, 13, 30, 12, 16,...
## $ `10/1/20`     <dbl> 0, 28, 53, 7, 10, 15, 16, 40, 44, 42, 13, 30, 12, 16,...
## Observations: 811,530
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...

# STEP 3: Create appropriately filtered data without creating new clusters
clust_20201003 <- prepClusterCounties(burdenFile=burden_20201003, 
                                      maxDate=maxDate_20201003, 
                                      minPop=25000,
                                      createClusters=FALSE
                                      )
## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0214              0          0.179          0.0761
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>
# STEP 4: Assess the existing clusters against the new data, including the cumulative plot
helperACC_county_20201003 <- helperAssessCountyClusters(vecCluster=clustVec_county_20200903_new, 
                                                        dfPop=clust_20201003$countyFiltered, 
                                                        dfBurden=clust_20201003$countyFiltered, 
                                                        showCum=TRUE,
                                                        thruLabel="Sep 30, 2020", 
                                                        plotsTogether=TRUE, 
                                                        orderCluster=TRUE
                                                        )

## 
## Recency is defined as 2020-09-01 through current
## 
## Recency is defined as 2020-09-01 through current
## Warning: Removed 1 rows containing missing values (geom_point).

# STEP 5: Add back clusters not used for analysis (code 999) and associated disease data
clusterStateData_20201003 <- helperMakeClusterStateData(helperACC_county_20201003, 
                                                        dfBurden=clust_20201003$countyDailyPerCapita,
                                                        orderCluster=TRUE
                                                        )
## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"
# STEP 6: Run an example state-level summary using several of the largest states
stateCountySummary(states=c("CA", "TX", "FL", "NY", "IL", "PA"),
                   df=changeOrderLabel(clusterStateData_20201003, grpVars="fipsCounty"),
                   keyDate=maxDate_20201003,
                   showQuadrants=TRUE, 
                   showCumulative=TRUE, 
                   facetCumulativeByState = TRUE, 
                   showAllFactorLevels = TRUE
                   )

# STEP 7: Assess lags for largest counties and counties with highest death rates
# STEP 7a: Save data for top-100 counties by death rate (filtered to only counties of 100k+ population)
top100_dpm_20201003 <- exploreTopCounties(helperACC_county_20201003, 
                                          minDate="2020-03-01", 
                                          maxDate="2020-09-30", 
                                          minPop=100000,
                                          nVar="dpm", 
                                          nKey=100, 
                                          plotData=FALSE
                                          )
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 100 x 5
##    state cluster countyName          dpm     pop
##    <chr> <fct>   <chr>             <dbl>   <dbl>
##  1 36005 3       3 - Bronx (NY)    3492. 1418207
##  2 36081 3       3 - Queens (NY)   3218. 2253858
##  3 36047 3       3 - Kings (NY)    2863. 2559903
##  4 34013 3       3 - Essex (NJ)    2663.  798975
##  5 34031 3       3 - Passaic (NJ)  2499.  501826
##  6 34039 3       3 - Union (NJ)    2437.  556341
##  7 36085 3       3 - Richmond (NY) 2281.  476143
##  8 34017 3       3 - Hudson (NJ)   2256.  672391
##  9 34003 3       3 - Bergen (NJ)   2194.  932202
## 10 48061 3       3 - Cameron (TX)  2155.  423163
## # ... with 90 more rows
## 
## The best lags are:
## # A tibble: 100 x 5
##    countyName            state cluster  corr   lag
##    <chr>                 <chr> <fct>   <dbl> <int>
##  1 3 - Hampden (MA)      25013 3       0.924     0
##  2 3 - San Juan (NM)     35045 3       0.836     0
##  3 4 - Baltimore cityMD) 24510 4       0.535     0
##  4 4 - Coconino (AZ)     04005 4       0.265     0
##  5 4 - Hendricks (IN)    18063 4       0.467     0
##  6 4 - Johnson (IN)      18081 4       0.527     0
##  7 4 - Marion (IN)       18097 4       0.544     0
##  8 4 - Montgomery (AL)   01101 4       0.761     0
##  9 4 - Montgomery (MD)   24031 4       0.895     0
## 10 4 - Oakland (MI)      26125 4       0.478     0
## # ... with 90 more rows
glimpse(top100_dpm_20201003)
## Observations: 21,200
## Variables: 9
## Groups: countyName, state, cluster [100]
## $ countyName <chr> "1 - Anderson (SC)", "1 - Highlands (FL)", "1 - Webb (TX...
## $ state      <chr> "45007", "12055", "48479", "34003", "36005", "48061", "2...
## $ cluster    <fct> 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,...
## $ date       <date> 2020-03-01, 2020-03-01, 2020-03-01, 2020-03-01, 2020-03...
## $ cpm7       <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.000000...
## $ dpm7       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ corr       <dbl> 0.9199305, 0.7872620, 0.8755114, 0.8252094, 0.9747650, 0...
## $ lag        <int> 26, 28, 30, 9, 6, 11, 8, 7, 9, 0, 2, 8, 26, 8, 6, 8, 10,...
## $ cpmlag     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, N...
# STEP 7b: Top-3 by population by cluster with plots
exploreTopCounties(helperACC_county_20201003, 
                   minDate="2020-03-01", 
                   maxDate="2020-09-30", 
                   topNBy="cluster",
                   nVar="pop", 
                   nKey=3, 
                   plotData=TRUE
                   )
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 15 x 5
##    state cluster countyName             dpm      pop
##    <chr> <fct>   <chr>                <dbl>    <dbl>
##  1 06037 5       5 - Los Angeles (CA)  655. 10039107
##  2 17031 4       4 - Cook (IL)        1015.  5150233
##  3 48201 1       1 - Harris (TX)       551.  4713325
##  4 04013 4       4 - Maricopa (AZ)     756.  4485414
##  5 06073 2       2 - San Diego (CA)    235.  3338330
##  6 06059 1       1 - Orange (CA)       399.  3175692
##  7 12086 4       4 - Miami-Dade (FL)  1207.  2716940
##  8 48113 1       1 - Dallas (TX)       430.  2635516
##  9 36047 3       3 - Kings (NY)       2863.  2559903
## 10 36081 3       3 - Queens (NY)      3218.  2253858
## 11 53033 5       5 - King (WA)         337.  2252782
## 12 06085 2       2 - Santa Clara (CA)  167.  1927852
## 13 26163 3       3 - Wayne (MI)       1706.  1749343
## 14 06001 2       2 - Alameda (CA)      247.  1671329
## 15 39049 5       5 - Franklin (OH)     467.  1316756

## 
## The best lags are:
## # A tibble: 15 x 5
##    countyName           state cluster  corr   lag
##    <chr>                <chr> <fct>   <dbl> <int>
##  1 5 - Franklin (OH)    39049 5       0.218     0
##  2 5 - Los Angeles (CA) 06037 5       0.616     0
##  3 2 - San Diego (CA)   06073 2       0.646     2
##  4 5 - King (WA)        53033 5       0.198     2
##  5 4 - Cook (IL)        17031 4       0.836     5
##  6 3 - Kings (NY)       36047 3       0.965     8
##  7 3 - Queens (NY)      36081 3       0.941     8
##  8 1 - Dallas (TX)      48113 1       0.782     9
##  9 3 - Wayne (MI)       26163 3       0.844    10
## 10 1 - Harris (TX)      48201 1       0.740    13
## 11 4 - Maricopa (AZ)    04013 4       0.965    18
## 12 1 - Orange (CA)      06059 1       0.757    24
## 13 2 - Alameda (CA)     06001 2       0.727    24
## 14 2 - Santa Clara (CA) 06085 2       0.487    30
## 15 4 - Miami-Dade (FL)  12086 4       0.785    30
## Warning: Removed 26 rows containing missing values (geom_path).
## Warning: Removed 26 rows containing missing values (geom_path).

## # A tibble: 3,180 x 9
## # Groups:   countyName, state, cluster [15]
##    countyName           state cluster date         cpm7  dpm7  corr   lag cpmlag
##    <chr>                <chr> <fct>   <date>      <dbl> <dbl> <dbl> <int>  <dbl>
##  1 1 - Dallas (TX)      48113 1       2020-03-01 0          0 0.782     9     NA
##  2 1 - Harris (TX)      48201 1       2020-03-01 0          0 0.740    13     NA
##  3 1 - Orange (CA)      06059 1       2020-03-01 0.0900     0 0.757    24     NA
##  4 2 - Alameda (CA)     06001 2       2020-03-01 0.0855     0 0.727    24     NA
##  5 2 - San Diego (CA)   06073 2       2020-03-01 0          0 0.646     2     NA
##  6 2 - Santa Clara (CA) 06085 2       2020-03-01 1.26       0 0.487    30     NA
##  7 3 - Kings (NY)       36047 3       2020-03-01 0          0 0.965     8     NA
##  8 3 - Queens (NY)      36081 3       2020-03-01 0          0 0.941     8     NA
##  9 3 - Wayne (MI)       26163 3       2020-03-01 0          0 0.844    10     NA
## 10 4 - Cook (IL)        17031 4       2020-03-01 0.0555     0 0.836     5     NA
## # ... with 3,170 more rows
# STEP 7c: Top-5 by dpm by cluster with plots
exploreTopCounties(helperACC_county_20201003, 
                   minDate="2020-03-01", 
                   maxDate="2020-09-30", 
                   topNBy="cluster",
                   nVar="dpm", 
                   nKey=5, 
                   plotData=TRUE
                   )
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 25 x 5
##    state cluster countyName            dpm     pop
##    <chr> <fct>   <chr>               <dbl>   <dbl>
##  1 36005 3       3 - Bronx (NY)      3492. 1418207
##  2 36081 3       3 - Queens (NY)     3218. 2253858
##  3 36047 3       3 - Kings (NY)      2863. 2559903
##  4 34013 3       3 - Essex (NJ)      2663.  798975
##  5 34031 3       3 - Passaic (NJ)    2499.  501826
##  6 48215 4       4 - Hidalgo (TX)    1755.  868707
##  7 04027 4       4 - Yuma (AZ)       1614.  213787
##  8 22017 4       4 - Caddo (LA)      1503.  240204
##  9 44007 4       4 - Providence (RI) 1398.  638931
## 10 45041 4       4 - Florence (SC)   1381.  138293
## # ... with 15 more rows

## 
## The best lags are:
## # A tibble: 25 x 5
##    countyName          state cluster    corr   lag
##    <chr>               <chr> <fct>     <dbl> <int>
##  1 2 - Pulaski (AR)    05119 2        0.558      0
##  2 5 - Columbiana (OH) 39029 5       -0.0149     0
##  3 5 - Litchfield (CT) 09005 5        0.922      4
##  4 4 - Caddo (LA)      22017 4        0.210      5
##  5 3 - Bronx (NY)      36005 3        0.975      6
##  6 3 - Essex (NJ)      34013 3        0.876      7
##  7 3 - Kings (NY)      36047 3        0.965      8
##  8 3 - Queens (NY)     36081 3        0.941      8
##  9 4 - Florence (SC)   45041 4        0.856      8
## 10 3 - Passaic (NJ)    34031 3        0.810      9
## # ... with 15 more rows
## Warning: Removed 158 rows containing missing values (geom_path).
## Warning: Removed 158 rows containing missing values (geom_path).

## # A tibble: 5,300 x 9
## # Groups:   countyName, state, cluster [25]
##    countyName         state cluster date        cpm7  dpm7  corr   lag cpmlag
##    <chr>              <chr> <fct>   <date>     <dbl> <dbl> <dbl> <int>  <dbl>
##  1 1 - Anderson (SC)  45007 1       2020-03-01     0     0 0.920    26     NA
##  2 1 - Highlands (FL) 12055 1       2020-03-01     0     0 0.787    28     NA
##  3 1 - Marion (FL)    12083 1       2020-03-01     0     0 0.904    27     NA
##  4 1 - Newton (GA)    13217 1       2020-03-01     0     0 0.781    26     NA
##  5 1 - Webb (TX)      48479 1       2020-03-01     0     0 0.876    30     NA
##  6 2 - Cabarrus (NC)  37025 2       2020-03-01     0     0 0.420    20     NA
##  7 2 - DeKalb (IL)    17037 2       2020-03-01     0     0 0.428    15     NA
##  8 2 - Douglas (NE)   31055 2       2020-03-01     0     0 0.539    18     NA
##  9 2 - Pulaski (AR)   05119 2       2020-03-01     0     0 0.558     0      0
## 10 2 - Warren (KY)    21227 2       2020-03-01     0     0 0.596    16     NA
## # ... with 5,290 more rows

CDC All-Cause Deaths

The CDC maintain public data for all-cause deaths by week and jurisdiction, available at CDC Weekly Deaths by Jurisdiction.

These data are known to have a lag between death and reporting, and the CDC back-correct to report deaths at the time the death occurred even if the death is reported in following weeks. This means totals for recent weeks tend to run low (lag), and the CDC run a projection of the expected total number of deaths given the historical lag times. Per other analysts on the internet, there is currently significant supra-lag, with lag times much longer than historical averages causing CDC projected deaths for recent weeks to be low.

Key Functions (CDC All-Cause Deaths)

Functions for reading and analyzing CDC all-cause deaths data include:

helperCDCFields <- function(x, df=cdcRenamed) {
    
    df %>%
        group_by_at(x) %>%
        summarize(n=n(), n_deaths_na=sum(is.na(deaths)), deaths=sum(deaths, na.rm=TRUE))
    
}


# Function to subset CDC data
subsetCDC <- function(df, 
                      critFilter=vector("list", 0), 
                      showPlot=TRUE, 
                      plotTitle=NULL, 
                      curYear=2020, 
                      prevYears=paste0("2015-", curYear-1)
                      ) {

    # FUNCTION ARGUMENTS:
    # df: the data frame containing the processed CDC data
    # critFilter: a filtering list of the form variable=c(possibleValues)
    # showPlot: boolean, whether to show plots of deaths by week and year
    # plotTitle: the title to be used for the plot (NULL will use a default title)
    # curYear: the current year (will be excluded from the mean and standard deviation plot)
    
    # Filter such that only matches to critFilter are included
    for (xNum in seq_len(length(critFilter))) {
        df <- df %>%
            filter_at(vars(all_of(names(critFilter)[xNum])), ~. %in% critFilter[[xNum]])
    }
    
    # Create a default title if none provided
    if (is.null(plotTitle)) plotTitle <- "All-cause deaths for filtered cohort"
    
    # All deaths by week and year for the filtered cohort
    allDeath <- df %>%
        group_by(year, week) %>%
        summarize(deaths=sum(deaths)) %>%
        ungroup() %>%
        mutate(weekfct=factor(week), yearint=as.integer(as.character(year)))

    # Show plots if requested
    if (showPlot) {
        # Plot of all deaths for the filtered cohort by week and year
        p1 <- allDeath %>%
            ggplot(aes(x=week, y=deaths, color=year, group=year)) + 
            geom_line() + 
            ylim(c(0, NA)) + 
            labs(x="Week", 
                 y="Deaths", 
                 title=plotTitle
                 )
        print(p1)
        
        # Plot of mean and standard deviation from previous years
        p2 <- allDeath %>%
            filter(year != curYear) %>%
            group_by(week) %>%
            summarize(mean=mean(deaths), sd=sd(deaths)) %>%
            ggplot(aes(x=week)) + 
            geom_line(aes(y=mean), color="red", size=2) + 
            geom_ribbon(aes(ymin=mean-sd, ymax=mean+sd), fill="pink", alpha=0.5) +
            labs(x="Week", 
                 y="Deaths", 
                 title=paste0(prevYears, " ", plotTitle), 
                 subtitle=paste0("Line is ", 
                                 prevYears, 
                                 " annual mean deaths, ribbon is +/- 1 standard deviation"
                                 )
                 ) + 
            ylim(c(0, NA))
        print(p2)

    }

    # Return the processed data frame
    allDeath
    
}


# Function to run the linear model to predict CDC deaths by week and year
cdcRegression <- function(df, 
                          curYear=2020, 
                          startYear=2015
                          ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing the processed CDC data
    # curYear: the current year
    # startYear: the year that is defined as the zero-point for the regression
    
    # Very basic linear model for deaths vs. week and year
    df %>%
        filter(year != curYear) %>%
        lm(deaths ~ weekfct + I(yearint-startYear) + 0, data=.)
    
}


# Function to make predictions based on the regression
cdcPrediction <- function(df, 
                          lmReg
                          ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame or tibble containing the data
    # lmReg: the linear regression model
    
    # Predictions and deltas
    allPred <- df %>%
        mutate(pred=predict(lmReg, newdata=.), delta=(deaths-pred)) %>%
        group_by(year) %>%
        arrange(week) %>%
        mutate(cumDelta=cumsum(delta), cumPred=cumsum(pred)) %>%
        ungroup()

    # Return the predictions and deltas    
    allPred

}


# Function to make plots of the predictions
cdcPredictedvsActual <- function(df, 
                                 cohortName,
                                 curYear=2020, 
                                 prevYears=paste0("2015-", curYear-1), 
                                 startWeek=9, 
                                 showCurrentPredvsPrev=TRUE, 
                                 showActualsvsPred=TRUE,
                                 showActualsvsPredRatio=TRUE, 
                                 showExcessDeaths=TRUE, 
                                 predActualPlotsOnePage=FALSE
                                 ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing actual and predicted deaths
    # cohortName: cohort name to use for plot titles
    # curYear: the current year
    # prevYears: the previous years
    # startWeek: week to use for starting the counts of cumulative excess deaths
    # showCurrentPredvsPrev: boolean, whether to plot current year predictions vs. previous years
    # showActualsvsPred: boolean, whether to plot actuals vs. predictions by year
    # showActualsvsPredRatio: boolean, whether to plot actuals vs. predictions ratios by yrea
    # showExcessDeaths: boolean, whether to plot estimated excess deaths
    # predActualPlotsOnePage: boolean, whether to place the predicted vs. actual plots all on one page
    
    if (showCurrentPredvsPrev) {
        # Current year predictions vs. previous year actuals
        p1 <- df %>%
            ggplot(aes(x=week)) + 
            geom_line(data=~filter(., year != curYear), aes(y=deaths, color=year, group=year)) + 
            geom_line(data=~filter(., year == curYear), aes(y=pred), lty=2, size=1) +
            ylim(c(0, NA)) + 
            labs(x="Week", 
                 y="Deaths", 
                 title=paste0("All-cause deaths for ", cohortName, " cohort"), 
                 subtitle=paste0("Solid lines are actual ", 
                                 prevYears, 
                                 ", dashed line is predicted ", 
                                 curYear
                                 )
                 )
        if (!predActualPlotsOnePage) print(p1)
    }
    
    if (showActualsvsPred) {
        # Plot of predictions and actuals facetted by year - deaths
        p2 <- df %>%
            select(year, week, deaths, pred) %>%
            pivot_longer(c(deaths, pred)) %>%
            mutate(name=ifelse(name=="deaths", "Actual", "Predicted")) %>%
            ggplot(aes(x=week)) + 
            geom_line(aes(y=value, color=name, group=name)) + 
            ylim(c(0, NA)) + 
            facet_wrap(~year) + 
            labs(x="Week", 
                 y="Deaths", 
                 title=paste0("All-cause deaths for ", cohortName, " cohort"), 
                 subtitle=paste0("Predictions are based on a simple linear model for ", prevYears, " data")
                 ) + 
            scale_color_discrete("Deaths")
        if (!predActualPlotsOnePage) print(p2)
    }
    
    if (showActualsvsPredRatio) {
        # Plot of predictions and actuals facetted by year - ratios
        p3 <- df %>%
            mutate(rat=deaths/pred) %>%
            ggplot(aes(x=week)) + 
            geom_line(aes(y=rat)) + 
            facet_wrap(~year) + 
            geom_hline(yintercept=1, lty=2) +
            labs(x="Week", 
                 y="Ratio (Actual vs. Predicted Deaths)", 
                 title=paste0("Actual vs. predicted deaths for ", cohortName, " cohort"), 
                 subtitle=paste0("Predictions are based on a simple linear model for ", prevYears, " data")
                 )
        if (!predActualPlotsOnePage) print(p3)
    }
    
    if (showExcessDeaths) {
        # Plot of excess deaths after a starting week
        p4 <- df %>%
            select(-cumDelta, -cumPred) %>%
            filter(week >= startWeek) %>%
            group_by(year) %>%
            arrange(week) %>%
            mutate(cumDelta=cumsum(delta)) %>%
            ggplot(aes(x=week)) + 
            geom_line(aes(y=cumDelta)) + 
            facet_wrap(~year) + 
            geom_hline(yintercept=0, lty=2) +
            labs(x="Week", 
                 y=paste0("Excess deaths vs. prediction (cumulative beginning week ", startWeek, ")"), 
                 title=paste0("Cumulative estimated excess deaths starting week ", 
                              startWeek, 
                              " for ", 
                              cohortName, 
                              " cohort"
                              ), 
                 subtitle=paste0("Predictions are based on a simple linear model for ", prevYears, " data")
                 )
        if (!predActualPlotsOnePage) print(p4)
    }
    
    # Create a one-page summary
    if (predActualPlotsOnePage) {
        if (!exists("p1", inherits=FALSE)) p1 <- ggplot()
        if (!exists("p2", inherits=FALSE)) p2 <- ggplot()
        if (!exists("p3", inherits=FALSE)) p3 <- ggplot()
        if (!exists("p4", inherits=FALSE)) p4 <- ggplot()
        gridExtra::grid.arrange(p1, p2, p3, p4, nrow=2)
    }

}


# Integrated function with all steps
cdcCohortAnalysis <- function(cohortName, 
                              df=cdcProcessed, 
                              critFilter=vector("list", 0), 
                              plotTitle=NULL, 
                              curYear=2020, 
                              startYear=2015,
                              prevYears=paste0(startYear, "-", curYear-1), 
                              startWeek=9, 
                              showSubsetPlots=TRUE, 
                              showPredActualPlots=TRUE, 
                              predActualPlotsOnePage=FALSE
                              ) {
    
    # FUNCTION ARGUMENTS:
    # cohortName: the name of the cohort
    # df: the processed CDC death data
    # critFilter: the filtering criteria in the form of a named list "variable"=c("possibleValues")
    # plotTitle: title to be used for plot in subsetCDC()
    # curYear: the current year for the death data
    # startYear: the starting year for the death data
    # prevYears: the previous years for the death data (used for making predictions)
    # startWeek: the starting week for counting excess deaths
    # showSubsetPlots: boolean, whether to show the two plots in subsetCDC()
    # showPredActualPlots: boolean, whether to show the four plots in cdcPredictedvsActual()
    # predActualPlotsOnePage: boolean, whether to place the predicted vs. actual plots all on one page
    
    # Subset the data (will show both plots by default)
    allDeath_cohort <- subsetCDC(df=df, 
                                 critFilter=critFilter,
                                 showPlot=showSubsetPlots,
                                 plotTitle=plotTitle, 
                                 curYear=curYear,
                                 prevYears=prevYears
                                 )

    # Create the linear model for the subsetted data
    lm_cohort <- cdcRegression(df=allDeath_cohort, curYear=curYear, startYear=startYear)

    # Apply the predictions to the data
    allPred_cohort <- cdcPrediction(df=allDeath_cohort, lmReg=lm_cohort)

    # Plots for predicted vs. actual (will show all plots by default)
    cdcPredictedvsActual(df=allPred_cohort, 
                         cohortName=cohortName, 
                         curYear=curYear,
                         prevYears=prevYears,
                         startWeek=startWeek, 
                         showCurrentPredvsPrev=showPredActualPlots,
                         showActualsvsPred=showPredActualPlots,
                         showActualsvsPredRatio=showPredActualPlots,
                         showExcessDeaths=showPredActualPlots, 
                         predActualPlotsOnePage=predActualPlotsOnePage
                         )

    # Return a list containing the lm model and the filtered data
    list(lmReg=lm_cohort, preds=allPred_cohort)
    
}


# Function to read and process raw CDC all-cause deaths data
readProcessCDC <- function(fName, 
                           weekThru,
                           periodKeep="2015-2019",
                           fDir="./RInputFiles/Coronavirus/",
                           col_types="ccciicdcccc", 
                           renameVars=c("Week Ending Date"="weekEnding", 
                                       "State Abbreviation"="state", 
                                       "Year"="year", 
                                       "Week"="week", 
                                       "Age Group"="age", 
                                       "Number of Deaths"="deaths", 
                                       "Time Period"="period", 
                                       "Type"="type"
                                       )
                           ) {
    
    # FUNCTION ARGUMENTS:
    # fName: name of the downloaded CDC data file
    # weekThru: any record where week is less than or equal to weekThru will be kept
    # periodKeep: any record where period is in periodKeep will be kept
    # fDir: directory name for the downloaded CDC data file
    # col_types: variable type by column in the CDC data (passed to readr::read_csv())
    # renameVars: named vector for variable renaming of type c("Existing Name"="New Name")
    
    # STEP 1: Read the CSV data
    cdcRaw <- readr::read_csv(paste0(fDir, fName), col_types=col_types)
    glimpse(cdcRaw)
    
    # STEP 2: Rename the variables for easier interpretation
    cdcRenamed <- cdcRaw
    names(cdcRenamed) <- ifelse(is.na(renameVars[names(cdcRenamed)]), 
                                names(cdcRenamed), 
                                renameVars[names(cdcRenamed)]
                                )
    cdcRenamed <- cdcRenamed %>%
        mutate(weekEnding=lubridate::mdy(weekEnding))
    glimpse(cdcRenamed)

    # STEP 2a: Check control levels for key variables in renamed file
    cat("\nCheck Control Levels and Record Counts for Renamed Data:\n")
    helperCDCFields("age", df=cdcRenamed) %>% print()
    helperCDCFields(c("period", "year", "type"), df=cdcRenamed) %>% print()
    helperCDCFields(c("period", "Suppress"), df=cdcRenamed) %>% print()
    helperCDCFields(c("period", "Note"), df=cdcRenamed) %>% print()
    helperCDCFields(c("state", "Jurisdiction"), df=cdcRenamed) %>% 
        arrange(-deaths) %>% 
        as.data.frame() %>% 
        print()
    p1 <- helperCDCFields(c("year", "week"), df=cdcRenamed) %>% 
        ggplot(aes(x=week, y=deaths, group=factor(year), color=factor(year))) + 
        geom_line() + 
        labs(y="Deaths (multiple counting on several dimensions", x="Epidemiological Week") + 
        scale_color_discrete("Year") +
        ylim(c(0, NA))
    print(p1)

    # STEP 3: Convert to factored data
    cdcFactored <- cdcRenamed %>%
        mutate(age=factor(age, levels=c("Under 25 years", "25-44 years", "45-64 years", 
                                        "65-74 years", "75-84 years", "85 years and older"
                                        )
                          ), 
               period=factor(period, levels=c("2015-2019", "2020")), 
               year=factor(year, levels=2015:2020)
               )
    glimpse(cdcFactored)
    
    # STEP 4: Filter the data to include only weighted deaths and only through the desired time period
    cdcFiltered <- cdcFactored %>%
        filter(type=="Predicted (weighted)", period %in% periodKeep | week <= weekThru)
    glimpse(cdcFiltered)
    
    # STEP 4a: Check that all suppressed data and NA deaths have been eliminated
    cat("\nConfirm that data suppressions and NA deaths have been aliminated:\n")
    cdcFiltered %>%
        filter(!is.na(Suppress) | is.na(deaths)) %>%
        print()

    # STEP 5: Delete the US record and convert YC to be part of NY
    cdcProcessed <- cdcFiltered %>%
        filter(!(state %in% c("US", "PR"))) %>%
        mutate(state=ifelse(state=="YC", "NY", state), 
               Jurisdiction=ifelse(state %in% c("NY", "YC"), "New York State (NY plus YC)", Jurisdiction)
               ) %>%
        group_by(Jurisdiction, weekEnding, state, year, week, age, period, type, Suppress) %>%
        arrange(!is.na(Note)) %>%
        summarize(n=n(), deaths=sum(deaths), Note=first(Note)) %>%
        ungroup()
    glimpse(cdcProcessed)

    # STEP 5a: Confirm that there are no duplicates in the final data file
    firstDup <- cdcProcessed %>%
        select(state, year, week, age) %>%
        anyDuplicated()
    cat("\nFirst duplicate is in row number (0 means no duplicates):", firstDup)

    # STEP 6: Return the processed data file
    cdcProcessed
    
}


# Basic plots of the CDC data, including by a state-level cluster (passed as argument)
cdcBasicPlots <- function(df, 
                          weekThru=NULL, 
                          curYear=NULL, 
                          clustVec=NULL
                          ) {
    
    # FUNCTION ARGUMENTS:
    # df: a processed CDC data file
    # weekThru: week of the current year that data are thru (NULL means infer from data)
    # curYear: current year (NULL means infer from data)
    # clustVec: clustering vector with names as state abbreviations (NULL means no plots by cluster)
    
    # Get the week and year if passed as NULL
    if (is.null(curYear)) { curYear <- df %>% pull(year) %>% as.character() %>% as.integer() %>% max() }
    if (is.null(weekThru)) { weekThru <- df %>% filter(year==curYear) %>% pull(week) %>% max() }
    
    # Plot of total deaths by year (50 states plus DC)
    p1 <- df %>%
        group_by(year) %>%
        summarize(deaths=sum(deaths)) %>%
        ggplot(aes(x=year, y=deaths/1000)) + 
        geom_col(fill="lightblue") + 
        geom_text(aes(y=deaths/2000, label=round(deaths/1000))) + 
        labs(title="CDC Deaths for 50 states plus DC", 
             x="", 
             y="Deaths (000s)", 
             subtitle=paste0("Through week ", weekThru, " of ", curYear)
             )
    print(p1)

    # Plot of total deaths by week by year
    p2 <- df %>%
        group_by(year, week) %>%
        summarize(deaths=sum(deaths)) %>%
        ungroup() %>%
        ggplot(aes(x=week, y=deaths, group=year, color=year)) + 
        geom_line() +
        ylim(c(0, NA)) + 
        labs(x="Week of Year", y="Deaths", title="US deaths per week by year")
    print(p2)
    
    # Plot of total deaths by year by age cohort
    p3 <- df %>%
        group_by(year, week, age) %>%
        summarize(deaths=sum(deaths)) %>%
        ungroup() %>%
        ggplot(aes(x=week, y=deaths, group=year, color=year)) + 
        geom_line() +
        ylim(c(0, NA)) + 
        facet_wrap(~age) +
        labs(x="Week of Year", y="Deaths", title="US deaths per week by year")
    print(p3)

    # Plots of total deaths by week by year by cluster
    if (!is.null(clustVec)) {
        p4 <- df %>%
            mutate(cluster=clustVec[state]) %>%
            group_by(year, week, cluster) %>%
            summarize(deaths=sum(deaths)) %>%
            ungroup() %>%
            ggplot(aes(x=week, y=deaths, group=year, color=year)) + 
            geom_line() +
            ylim(c(0, NA)) + 
            facet_wrap(~cluster) +
            labs(x="Week of Year", 
                 y="Deaths", 
                 title="US deaths per week by year", 
                 subtitle="Facetted by state cluster"
                 )
        print(p4)
        
        p5 <- df %>%
            mutate(cluster=clustVec[state]) %>%
            filter(year==curYear) %>%
            group_by(year, age, week, cluster) %>%
            summarize(deaths=sum(deaths)) %>%
            ungroup() %>%
            ggplot(aes(x=week, y=deaths, fill=age)) + 
            geom_col(position="stack") +
            ylim(c(0, NA)) + 
            facet_wrap(~cluster) +
            labs(x="Week of Year", 
                 y="Deaths", 
                 title=paste0("US deaths per week (Year ", curYear, ")"), 
                 subtitle="Facetted by state cluster"
                 )
        print(p5)
    }
    
}

Running Key Analyses (CDC All-Cause Deaths)

The CDC data can be loaded and analyzed:

# Read and process the CDC data
cdc20200923 <- readProcessCDC("Weekly_counts_of_deaths_by_jurisdiction_and_age_group_downloaded_20200923.csv", 
                              weekThru=30
                              )
## Observations: 174,311
## Variables: 11
## $ Jurisdiction         <chr> "Alabama", "Alabama", "Alabama", "Alabama", "A...
## $ `Week Ending Date`   <chr> "1/10/2015", "1/17/2015", "1/24/2015", "1/31/2...
## $ `State Abbreviation` <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL"...
## $ Year                 <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015...
## $ Week                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...
## $ `Age Group`          <chr> "25-44 years", "25-44 years", "25-44 years", "...
## $ `Number of Deaths`   <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50...
## $ `Time Period`        <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2...
## $ Type                 <chr> "Predicted (weighted)", "Predicted (weighted)"...
## $ Suppress             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 174,311
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <chr> "25-44 years", "25-44 years", "25-44 years", "25-44 ye...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2019", "2...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Check Control Levels and Record Counts for Renamed Data:
## # A tibble: 6 x 4
##   age                    n n_deaths_na   deaths
##   <chr>              <int>       <int>    <dbl>
## 1 25-44 years        26066           3  3170502
## 2 45-64 years        31884           7 12464677
## 3 65-74 years        31880          10 12347549
## 4 75-84 years        31902          11 15363924
## 5 85 years and older 31888           9 20067928
## 6 Under 25 years     20691           0  1372225
## # A tibble: 12 x 6
## # Groups:   period, year [6]
##    period     year type                     n n_deaths_na  deaths
##    <chr>     <int> <chr>                <int>       <int>   <dbl>
##  1 2015-2019  2015 Predicted (weighted) 15285           0 5416393
##  2 2015-2019  2015 Unweighted           15285           0 5416393
##  3 2015-2019  2016 Predicted (weighted) 15365           0 5483764
##  4 2015-2019  2016 Unweighted           15365           0 5483764
##  5 2015-2019  2017 Predicted (weighted) 15318           0 5643350
##  6 2015-2019  2017 Unweighted           15318           0 5643350
##  7 2015-2019  2018 Predicted (weighted) 15305           0 5698002
##  8 2015-2019  2018 Unweighted           15305           0 5698002
##  9 2015-2019  2019 Predicted (weighted) 15319           0 5725516
## 10 2015-2019  2019 Unweighted           15319           0 5725516
## 11 2020       2020 Predicted (weighted) 10586          24 4476340
## 12 2020       2020 Unweighted           10541          16 4376415
## # A tibble: 3 x 5
## # Groups:   period [2]
##   period    Suppress                                       n n_deaths_na  deaths
##   <chr>     <chr>                                      <int>       <int>   <dbl>
## 1 2015-2019 <NA>                                      153184           0  5.59e7
## 2 2020      Suppressed (counts highly incomplete, <5~     40          40  0.    
## 3 2020      <NA>                                       21087           0  8.85e6
## # A tibble: 9 x 5
## # Groups:   period [2]
##   period   Note                                            n n_deaths_na  deaths
##   <chr>    <chr>                                       <int>       <int>   <dbl>
## 1 2015-20~ <NA>                                       153184           0  5.59e7
## 2 2020     Data in recent weeks are incomplete. Only~  16591           0  7.27e6
## 3 2020     Data in recent weeks are incomplete. Only~    324           0  1.52e5
## 4 2020     Data in recent weeks are incomplete. Only~    288          30  3.05e4
## 5 2020     Data in recent weeks are incomplete. Only~   1502          10  4.17e5
## 6 2020     Data in recent weeks are incomplete. Only~     60           0  2.71e4
## 7 2020     Estimates for Pennsylvania are too low fo~     48           0  2.23e4
## 8 2020     Weights may be too low to account for und~    436           0  1.40e5
## 9 2020     <NA>                                         1878           0  7.97e5
##    state         Jurisdiction    n n_deaths_na   deaths
## 1     US        United States 3552           0 32276762
## 2     CA           California 3552           0  3072016
## 3     FL              Florida 3552           0  2357528
## 4     TX                Texas 3552           0  2314502
## 5     PA         Pennsylvania 3552           0  1548716
## 6     OH                 Ohio 3552           0  1395014
## 7     IL             Illinois 3552           0  1219119
## 8     NY             New York 3552           0  1155790
## 9     MI             Michigan 3552           0  1111203
## 10    NC       North Carolina 3521          17  1051632
## 11    GA              Georgia 3551           0   965504
## 12    NJ           New Jersey 3546           0   867210
## 13    TN            Tennessee 3552           0   840787
## 14    VA             Virginia 3552           0   774383
## 15    IN              Indiana 3550           0   749760
## 16    MO             Missouri 3548           0   728220
## 17    MA        Massachusetts 3516           0   685409
## 18    AZ              Arizona 3552           0   684537
## 19    YC        New York City 3548           0   671106
## 20    WA           Washington 3551           0   645406
## 21    AL              Alabama 3550           0   598526
## 22    WI            Wisconsin 3533           0   592047
## 23    MD             Maryland 3546           0   570238
## 24    SC       South Carolina 3549           0   560415
## 25    KY             Kentucky 3519           0   545032
## 26    LA            Louisiana 3545           0   525668
## 27    MN            Minnesota 3509           0   503567
## 28    CO             Colorado 3550           0   446708
## 29    OK             Oklahoma 3541           0   445362
## 30    OR               Oregon 3382           0   413553
## 31    MS          Mississippi 3488           0   363792
## 32    AR             Arkansas 3444           0   361612
## 33    CT          Connecticut 3106          13   356416
## 34    IA                 Iowa 3190           0   339791
## 35    PR          Puerto Rico 3272           0   331654
## 36    KS               Kansas 3246           0   296520
## 37    NV               Nevada 3291           0   289275
## 38    WV        West Virginia 3011          10   251046
## 39    UT                 Utah 3438           0   213931
## 40    NM           New Mexico 3140           0   205026
## 41    NE             Nebraska 2846           0   188492
## 42    ME                Maine 2646           0   160341
## 43    ID                Idaho 2766           0   152936
## 44    NH        New Hampshire 2672           0   135110
## 45    HI               Hawaii 2556           0   124379
## 46    RI         Rhode Island 2474           0   114274
## 47    MT              Montana 2556           0   109719
## 48    DE             Delaware 2558           0    99625
## 49    SD         South Dakota 2448           0    86461
## 50    ND         North Dakota 2433           0    75079
## 51    DC District of Columbia 2545           0    64202
## 52    VT              Vermont 2336           0    61925
## 53    WY              Wyoming 2318           0    47230
## 54    AK               Alaska 2352           0    42249

## Observations: 174,311
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 85,466
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Confirm that data suppressions and NA deaths have been aliminated:
## # A tibble: 0 x 11
## # ... with 11 variables: Jurisdiction <chr>, weekEnding <date>, state <chr>,
## #   year <fct>, week <int>, age <fct>, deaths <dbl>, period <fct>, type <chr>,
## #   Suppress <chr>, Note <chr>
## Observations: 80,379
## Variables: 12
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-10, 2015-01-10, 2015-01-10, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, ...
## $ age          <fct> Under 25 years, 25-44 years, 45-64 years, 65-74 years,...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ n            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ deaths       <dbl> 25, 67, 253, 202, 272, 320, 28, 49, 256, 222, 253, 332...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## First duplicate is in row number (0 means no duplicates): 0
# Generate plots of the processed CDC data
cdcBasicPlots(cdc20200923, clustVec=test_hier5$useClusters)

# Example cohort analysis for age 65+
list_65plus <- cdcCohortAnalysis(cohortName="65+ years old", 
                                 df=cdc20200923,
                                 critFilter=list("age"=c("65-74 years", 
                                                         "75-84 years", 
                                                         "85 years and older"
                                                         )
                                                 ), 
                                 curYear=2020, 
                                 startYear=2015,
                                 startWeek=9,
                                 plotTitle="All-cause deaths for 65+ years old cohort", 
                                 predActualPlotsOnePage=TRUE
                                 )

# Example cohort analysis for full US
list_allUS <- cdcCohortAnalysis(cohortName="all ages, all states", 
                                df=cdc20200923,
                                curYear=2020, 
                                startYear=2015,
                                startWeek=9,
                                plotTitle="All-cause US total deaths",
                                predActualPlotsOnePage=TRUE
                                )

# Example cohort analysis for handful of state hit early
list_early <- cdcCohortAnalysis(cohortName="all ages, NY/NJ/CT/MA", 
                                df=cdc20200923,
                                critFilter=list("state"=c("NY", "NJ", "CT", "MA")),
                                curYear=2020, 
                                startYear=2015,
                                startWeek=9,
                                plotTitle="All-cause total deaths for NY/NJ/CT/MA",
                                predActualPlotsOnePage=TRUE
                                )

Next steps are to continue with the CDC aggregates function (analyze by cluster or age or state ot etc.):

# Function to create data and plots for an aggregate (cluster in this case)
cdcAggregateSummary <- function(df, 
                                critVar, 
                                critSubsets, 
                                startWeek,
                                idVarName=critVar,
                                curYear=2020, 
                                startYear=2015,
                                showPlots=TRUE
                                ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing the processed CDC all-cause deaths data
    # critVar: the main variable supplied to critFilter
    # critSubsets: the subsets used for critVar
    # startWeek: the starting week for analysis (will assume no excess deaths prior to startWeek or curYear)
    # idVarName: name to be used for the identifying variable when excess deaths data are combined
    # curYear: current year of CDC deaths data
    # startYear: starting year of CDC deaths data
    # showPlots: boolean, whether to show a 1-page plot summary for each element of critSubsets
    
    tmpList <- vector("list", length(critSubsets))
    
    for (ctr in seq_along(critSubsets)) {
        critUse <- list(critSubsets[[ctr]])
        names(critUse) <- critVar
        tmpList[[ctr]] <- cdcCohortAnalysis(cohortName=paste0("Iteration: ", ctr), 
                                            df=df, 
                                            critFilter=critUse, 
                                            plotTitle="",
                                            curYear=curYear,
                                            startYear=startYear,
                                            startWeek=startWeek, 
                                            showSubsetPlots=FALSE, 
                                            showPredActualPlots=showPlots,
                                            predActualPlotsOnePage=showPlots
                                            )
    }
    
    # Create the excess file
    excessFull <- map_dfr(.x=tmpList, .f=function(x) { x[["preds"]] }, .id=idVarName)
    excessFull

    
}

cdcAggregateSummary(df=cdc20200923, critVar="state", critSubsets=list(c("NY", "NJ"), c("FL", "GA")), startWeek=9, idVarName="cluster")


# Attach population by cluster
excessCluster <- stateData %>%
    mutate(cluster=clustVec[state]) %>%
    group_by(cluster) %>%
    summarize(pop=sum(pop)) %>%
    right_join(excessCluster, by="cluster") %>%
    mutate(cluster=as.character(cluster))
excessCluster

# Plot of excess deaths by week by cluster
excessCluster %>%
    filter(year==2020) %>%
    ggplot(aes(x=week, y=delta)) + 
    geom_line(aes(group=cluster, color=cluster)) + 
    labs(title="Actual minus predicted deaths by cluster and week (2020)", 
         x="Week", 
         y="Actual minus predicted deaths"
         ) + 
    scale_color_discrete("Cluster")

# Plot of excess deaths per million by week by cluster
excessCluster %>%
    filter(year==2020) %>%
    ggplot(aes(x=week, y=1000000*delta/pop)) + 
    geom_line(aes(group=cluster, color=cluster)) + 
    labs(title="Actual minus predicted deaths by cluster and week (2020)", 
         subtitle="Per million people, per week",
         x="Week", 
         y="Actual minus predicted deaths (per million people)"
         ) + 
    scale_color_discrete("Cluster") + 
    geom_hline(aes(yintercept=0), lty=2)


# Explore the conversion of week by week ending data in the raw CDC data
cdcRaw %>% 
    filter(Year==2020) %>% 
    count(Week, `Week Ending Date`) %>%
    as.data.frame()

# Pull the cluster-level data and create a CDC week number
fullDeathData <- consolidatedPlotData_20200830 %>%
    filter(name=="deaths", state=="cluster") %>%
    ungroup() %>%
    mutate(week=lubridate::epiweek(date), cluster=as.character(cluster)) %>%
    group_by(cluster, week) %>%
    summarize(cvDeaths=sum(value)) %>%
    ungroup() %>%
    right_join(excessCluster, by=c("cluster", "week")) %>%
    mutate(cvDeaths=ifelse(is.na(cvDeaths), 0, cvDeaths))
fullDeathData

# Plot for total excess deaths and reported coronavirus deaths by cluster by week
fullDeathData %>%
    filter(year==2020) %>%
    select(cluster, week, cvDeaths, pop, delta) %>%
    pivot_longer(-c(cluster, week, pop), names_to="source", values_to="deaths") %>%
    ggplot(aes(x=week, y=deaths)) + 
    geom_line(aes(group=source, color=c("cvDeaths"="Reported COVID", "delta"="Excess All-Cause")[source])) + 
    facet_wrap(~cluster) + 
    scale_color_discrete("Deaths")


listStates_v001 <- vector("list", 51)

for (abbState in names(clustVec)) {
    listStates_v001[[abbState]] <- cdcCohortAnalysis(cohortName=paste0("all ages, state ", abbState), 
                                                     critFilter=list("state"=abbState),
                                                     plotTitle=paste0("All-cause total deaths for state ",
                                                                      abbState
                                                                      ), 
                                                     showSubsetPlots=FALSE,
                                                     showPredActualPlots=FALSE
                                                     )
}

# Create the excess cluster file
excessState_v001 <- map_dfr(.x=listStates_v001, .f=function(x) { x[["preds"]] }, .id="state")
excessState_v001


excessState_v001 %>%
    filter(year==2020) %>%
    mutate(quarter=factor(paste0("Q", ceiling(week/13), "-20"))) %>%
    group_by(state, quarter) %>%
    summarize(excess=sum(delta)) %>%
    ggplot(aes(x=fct_reorder(state, excess, .fun=sum), y=excess/1000, fill=fct_rev(quarter))) + 
    geom_col(position="stack") + 
    coord_flip() + 
    labs(x="State", 
         y="Excess Deaths (000s)", 
         title="All-cause excess deaths in 2020", 
         subtitle="CDC data through July 2020 (Q3 incomplete)"
         ) +
    scale_fill_discrete("Quarter")

excessState_v001 %>%
    filter(year==2020) %>%
    mutate(quarter=factor(paste0("Q", ceiling(week/13), "-20"))) %>%
    group_by(state, quarter) %>%
    summarize(excess=sum(delta)) %>%
    ungroup() %>%
    left_join(select(usmap::statepop, abbr, pop_2015), by=c("state"="abbr")) %>%
    mutate(excesspm=excess*1000000/pop_2015) %>%
    ggplot(aes(x=fct_reorder(state, excesspm, .fun=sum), y=excesspm, fill=fct_rev(quarter))) + 
    geom_col(position="stack") + 
    coord_flip() + 
    labs(x="State", 
         y="Excess Deaths (per million people)", 
         title="All-cause excess deaths per million people in 2020", 
         subtitle="CDC data through July 2020 (Q3 incomplete)"
         ) +
    scale_fill_discrete("Quarter")

pes_001 <- excessState_v001 %>%
    filter(year==2020, week>=10) %>%
    mutate(quarter=factor(paste0("Q", ceiling(week/13), "-20")), 
           month=factor(case_when(week <= 13 ~ "Mar", 
                                  week <= 18 ~ "Apr", 
                                  week <= 22 ~ "May", 
                                  week <= 26 ~ "Jun", 
                                  week <= 31 ~ "Jul", 
                                  TRUE ~ "Error"), 
                        levels=c("Mar", "Apr", "May", "Jun", "Jul", "Error")
                        )
           ) %>%
    group_by(state, month) %>%
    summarize(excess=sum(delta)) %>%
    ungroup() %>%
    left_join(select(usmap::statepop, abbr, pop_2015), by=c("state"="abbr")) %>%
    mutate(excesspm=excess*1000000/pop_2015) %>%
    ggplot(aes(x=fct_reorder(state, excesspm, .fun=sum))) + 
    geom_col(aes(y=excesspm, fill=fct_rev(month)), position="stack") + 
    coord_flip() + 
    labs(x="State", 
         y="Excess Deaths (per million people)", 
         title="All-cause excess deaths per million people in 2020", 
         subtitle="CDC data through July 2020", 
         caption="Epi Week to Month: Mar (10-13), Apr (14-18), May (19-22), Jun (23-26), Jul (27-31)"
         ) +
    scale_fill_discrete("Month")
pes_001

cumDeathState_001 <- consolidatedPlotData_20200830 %>% 
    filter(name=="deaths", state != "cluster") %>% 
    group_by(state) %>% 
    mutate(cumdpm=cumsum(vpm)) %>%
    filter(date %in% c(as.Date("2020-07-31"), max(date)))

pes_001 + 
    geom_point(data=filter(cumDeathState_001, date==as.Date("2020-07-31")), aes(x=state, y=cumdpm), size=2) + 
    labs(title="Deaths per million people using data through July 2020", 
         subtitle="Bars based on CDC all-cause deaths, points are reported coronavirus deaths", 
         y="Deaths (per million people) through July 2020"
         )

excessState_v001 %>%
    filter(year==2020, week >= 10) %>%
    group_by(state) %>%
    summarize(delta=sum(delta)) %>%
    left_join(select(usmap::statepop, abbr, pop_2015), by=c("state"="abbr")) %>%
    mutate(deltapm=delta*1000000/pop_2015) %>%
    inner_join(select(filter(cumDeathState_001, date==as.Date("2020-07-31")), state, cumdpm)) %>%
    mutate(rat=deltapm/cumdpm) %>%
    ggplot(aes(x=fct_reorder(state, rat), y=rat)) + 
    geom_col(fill="lightblue") + 
    geom_text(aes(y=rat/2, label=paste0(round(100*rat), "%"))) + 
    coord_flip() + 
    labs(x="State", 
         y="Ratio of estimated all-cause excess deaths to reported coronavirus deaths", 
         title="Ratio of estimated all-cause excess deaths to reported coronavirus deaths", 
         subtitle="All data through July 2020"
         ) + 
    ylim(c(0, NA)) + 
    geom_hline(aes(yintercept=1), lty=2)

for (abbState in c("HI", "VT", "WY", "WV", "OR", "TX")) {
    cdcCohortAnalysis(cohortName=paste0("all ages, state ", abbState), 
                      critFilter=list("state"=abbState),
                      plotTitle=paste0("All-cause total deaths for state ", abbState), 
                      showSubsetPlots=FALSE,
                      showPredActualPlots=TRUE,
                      predActualPlotsOnePage=TRUE
                      )
}


listAges_v001 <- vector("list", 6)

n <- 1
for (age in levels(cdcProcessed$age)) {
    listAges_v001[[n]] <- cdcCohortAnalysis(cohortName="", 
                                            critFilter=list("age"=age),
                                            plotTitle="", 
                                            showSubsetPlots=FALSE,
                                            showPredActualPlots=FALSE
                                            )
    n <- n + 1
}

names(listAges_v001) <- levels(cdcProcessed$age)

# Create the excess cluster file
excessAge_v001 <- map_dfr(.x=listAges_v001, .f=function(x) { x[["preds"]] }, .id="age")
excessAge_v001

excessAge_v001 %>%
    filter(year==2020) %>%
    mutate(quarter=factor(paste0("Q", ceiling(week/13), "-20"))) %>%
    group_by(age, quarter) %>%
    summarize(excess=sum(delta)) %>%
    ggplot(aes(x=fct_reorder(age, excess, .fun=sum), y=excess/1000, fill=fct_rev(quarter))) + 
    geom_col(position="stack") + 
    coord_flip() + 
    labs(x="Age Group", 
         y="Excess Deaths (000s)", 
         title="All-cause excess deaths in 2020", 
         subtitle="CDC data through July 2020 (Q3 incomplete)"
         ) +
    scale_fill_discrete("Quarter")

# Estimated US population by age (2020)
usPopAge2020 <- survival::uspop2[, , "2020"] %>%
    apply(1, FUN=sum) %>%
    tibble::tibble(ageActual=as.integer(names(.)), pop_2020=.)
usPopAge2020

# Sums by age bucket
usPopBucket2020 <- usPopAge2020 %>%
    mutate(age=factor(case_when(ageActual <= 24 ~ "Under 25 years", 
                                ageActual <= 44 ~ "25-44 years", 
                                ageActual <= 64 ~ "45-64 years", 
                                ageActual <= 74 ~ "65-74 years", 
                                ageActual <= 84 ~ "75-84 years", 
                                TRUE ~ "85 years and older"
                                ), levels=levels(cdcProcessed$age)
                      )
           ) %>%
    group_by(age) %>%
    summarize(pop_2020=sum(pop_2020))
usPopBucket2020

pea_001 <- excessAge_v001 %>%
    filter(year==2020, week>=10) %>%
    mutate(quarter=factor(paste0("Q", ceiling(week/13), "-20")), 
           month=factor(case_when(week <= 13 ~ "Mar", 
                                  week <= 18 ~ "Apr", 
                                  week <= 22 ~ "May", 
                                  week <= 26 ~ "Jun", 
                                  week <= 31 ~ "Jul", 
                                  TRUE ~ "Error"), 
                        levels=c("Mar", "Apr", "May", "Jun", "Jul", "Error")
                        ), 
           age=factor(age, levels=levels(cdcProcessed$age))
           ) %>%
    group_by(age, month) %>%
    summarize(excess=sum(delta)) %>%
    ungroup() %>%
    inner_join(usPopBucket2020, by="age") %>%
    mutate(excesspm=1000000*excess/pop_2020) %>%
    ggplot(aes(x=fct_reorder(age, excesspm, .fun=sum))) + 
    geom_col(aes(y=excesspm, fill=fct_rev(month)), position="stack") + 
    coord_flip() + 
    labs(x="Age Group", 
         y="Excess Deaths (per million people)", 
         title="All-cause excess deaths per million in 2020", 
         subtitle="CDC data through July 2020", 
         caption="Epi Week to Month: Mar (10-13), Apr (14-18), May (19-22), Jun (23-26), Jul (27-31)"
         ) +
    geom_text(data=. %>% group_by(age) %>% summarize(excesspm=sum(excesspm)), 
              aes(y=excesspm+100, label=round(excesspm), hjust=0)
              ) +
    scale_fill_discrete("Month")
pea_001

for (age in levels(cdcProcessed$age)) {
    cdcCohortAnalysis(cohortName=paste0("ages ", age), 
                      critFilter=list("age"=age),
                      plotTitle=paste0("All-cause total deaths for age ", age), 
                      showSubsetPlots=FALSE,
                      showPredActualPlots=TRUE,
                      predActualPlotsOnePage=TRUE
                      )
}

ageOld <- c("65-74 years", "75-84 years", "85 years and older")
cdcTwoAge <- cdcProcessed %>%
    mutate(ageSplit=factor(ifelse(age %in% ageOld, "65 and over", "Under 65"), 
                           levels=c("Under 65", "65 and over")
                           )
           ) %>%
    group_by(state, year, week, ageSplit) %>%
    summarize(deaths=sum(deaths)) %>%
    ungroup()
cdcTwoAge


listAgeState_v001 <- vector("list", 102)

n <- 1
for (age in levels(cdcTwoAge$ageSplit)) {
    for (state in state.abb) {
        listAgeState_v001[[n]] <- cdcCohortAnalysis(df=cdcTwoAge, 
                                                    cohortName="", 
                                                    critFilter=list("ageSplit"=age, "state"=state),
                                                    plotTitle="", 
                                                    showSubsetPlots=FALSE,
                                                    showPredActualPlots=FALSE
                                                    )
        n <- n + 1
    }
}

names(listAgeState_v001) <- paste(rep(levels(cdcTwoAge$ageSplit), each=51), state.abb, sep="_")

# Create the excess cluster file
excessAgeState_v001 <- map_dfr(.x=listAgeState_v001, .f=function(x) { x[["preds"]] }, .id="ageState") %>%
    tidyr::separate(ageState, into=c("age", "state"), sep="_")
excessAgeState_v001

excessAgeState_v001 %>%
    filter(year==2020) %>%
    group_by(age, state) %>%
    summarize(excess=sum(delta)) %>%
    ggplot(aes(x=fct_reorder(state, excess, .fun=sum), y=excess/1000, fill=age)) + 
    geom_col(position="fill") + 
    coord_flip() + 
    labs(x="State", 
         y="Excess Deaths (000s)", 
         title="All-cause excess deaths in 2020", 
         subtitle="CDC data through July 2020 (Q3 incomplete)"
         ) +
    scale_fill_discrete("Age Group") + 
    geom_hline(aes(yintercept=0.25), lty=2)


for (age in levels(cdcTwoAge$ageSplit)) {
    cdcCohortAnalysis(df=cdcTwoAge, 
                      cohortName=paste0("ages ", age), 
                      critFilter=list("ageSplit"=age),
                      plotTitle=paste0("All-cause total deaths for age ", age), 
                      showSubsetPlots=FALSE,
                      showPredActualPlots=TRUE,
                      predActualPlotsOnePage=TRUE
                      )
}